aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/CodeGen
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2021-06-13 19:31:46 +0000
committerDimitry Andric <dim@FreeBSD.org>2021-06-13 19:37:19 +0000
commite8d8bef961a50d4dc22501cde4fb9fb0be1b2532 (patch)
tree94f04805f47bb7c59ae29690d8952b6074fff602 /contrib/llvm-project/llvm/lib/CodeGen
parentbb130ff39747b94592cb26d71b7cb097b9a4ea6b (diff)
parentb60736ec1405bb0a8dd40989f67ef4c93da068ab (diff)
downloadsrc-e8d8bef961a50d4dc22501cde4fb9fb0be1b2532.tar.gz
src-e8d8bef961a50d4dc22501cde4fb9fb0be1b2532.zip
Diffstat (limited to 'contrib/llvm-project/llvm/lib/CodeGen')
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AllocationOrder.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AllocationOrder.h128
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp61
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp79
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AddressPool.h2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp589
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp49
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp42
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h8
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp176
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h11
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp90
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.h4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp185
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp76
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp86
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h12
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp482
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h64
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfException.h14
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp46
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h14
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp282
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h34
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp397
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h43
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp84
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h53
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.h1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp53
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.cpp56
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSections.cpp (renamed from contrib/llvm-project/llvm/lib/CodeGen/BBSectionsPrepare.cpp)159
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.h3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp40
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/BreakFalseDeps.cpp32
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp289
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CallingConvLower.cpp23
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CodeGenPassBuilder.cpp25
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp387
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp96
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/DetectDeadLanes.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/DwarfEHPrepare.cpp232
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp106
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ExpandReductions.cpp128
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp362
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GCRootLowering.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp28
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp34
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp624
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp1974
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp249
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp894
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp24
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp1595
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp23
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp145
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp390
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp51
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/IfConversion.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ImplicitNullChecks.cpp258
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp107
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/InterferenceCache.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/InterferenceCache.h18
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp119
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/IntrinsicLowering.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LLVMTargetMachine.cpp63
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LexicalScopes.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp3363
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp97
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h32
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp (renamed from contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues.cpp)224
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp71
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.h1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveInterval.cpp23
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalCalc.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalUnion.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp79
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp29
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveRegMatrix.cpp52
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveRegUnits.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp61
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LowLevelType.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MBFIWrapper.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.h2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp34
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp37
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp137
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp244
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp84
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp48
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineCheckDebugify.cpp126
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp72
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp126
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineDebugify.cpp32
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp125
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPass.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionSplitter.cpp155
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp176
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp169
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineLoopInfo.cpp56
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineLoopUtils.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfo.cpp27
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp30
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachinePassManager.cpp121
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp51
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp25
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineSSAUpdater.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp256
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp290
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineStableHash.cpp194
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineTraceMetrics.cpp28
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp412
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MultiHazardRecognizer.cpp92
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp74
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PHIEliminationUtils.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ParallelCG.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp165
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp40
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PseudoProbeInserter.cpp95
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RDFGraph.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RDFLiveness.cpp151
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RDFRegisters.cpp35
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp186
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.cpp34
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.h4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocBasic.cpp51
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp1486
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp537
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocPBQP.cpp117
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegisterClassInfo.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp270
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.h14
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegisterPressure.cpp80
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp43
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RenameIndependentSubregs.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp35
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp911
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp2359
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp247
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp103
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h12
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp551
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp289
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp396
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h62
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp25
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp1082
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp31
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp946
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp31
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp771
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h22
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp53
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp131
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp322
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp1042
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ShrinkWrap.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SjLjEHPrepare.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SpillPlacement.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp78
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SplitKit.h13
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/StackColoring.cpp56
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/StackMaps.cpp180
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp22
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SwitchLoweringUtils.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp44
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp272
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp443
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetOptionsImpl.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp303
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp53
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetSubtargetInfo.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp262
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp51
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/VirtRegMap.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp31
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/XRayInstrumentation.cpp12
228 files changed, 24456 insertions, 9720 deletions
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AllocationOrder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AllocationOrder.cpp
index c99800659bfd..2aef1234ac0e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AllocationOrder.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AllocationOrder.cpp
@@ -26,17 +26,15 @@ using namespace llvm;
#define DEBUG_TYPE "regalloc"
// Compare VirtRegMap::getRegAllocPref().
-AllocationOrder::AllocationOrder(unsigned VirtReg,
- const VirtRegMap &VRM,
- const RegisterClassInfo &RegClassInfo,
- const LiveRegMatrix *Matrix)
- : Pos(0), HardHints(false) {
+AllocationOrder AllocationOrder::create(unsigned VirtReg, const VirtRegMap &VRM,
+ const RegisterClassInfo &RegClassInfo,
+ const LiveRegMatrix *Matrix) {
const MachineFunction &MF = VRM.getMachineFunction();
const TargetRegisterInfo *TRI = &VRM.getTargetRegInfo();
- Order = RegClassInfo.getOrder(MF.getRegInfo().getRegClass(VirtReg));
- if (TRI->getRegAllocationHints(VirtReg, Order, Hints, MF, &VRM, Matrix))
- HardHints = true;
- rewind();
+ auto Order = RegClassInfo.getOrder(MF.getRegInfo().getRegClass(VirtReg));
+ SmallVector<MCPhysReg, 16> Hints;
+ bool HardHints =
+ TRI->getRegAllocationHints(VirtReg, Order, Hints, MF, &VRM, Matrix);
LLVM_DEBUG({
if (!Hints.empty()) {
@@ -51,4 +49,5 @@ AllocationOrder::AllocationOrder(unsigned VirtReg,
assert(is_contained(Order, Hints[I]) &&
"Target hint is outside allocation order.");
#endif
+ return AllocationOrder(std::move(Hints), Order, HardHints);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AllocationOrder.h b/contrib/llvm-project/llvm/lib/CodeGen/AllocationOrder.h
index fa0690ab4ea5..0701e6810100 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AllocationOrder.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AllocationOrder.h
@@ -17,9 +17,9 @@
#define LLVM_LIB_CODEGEN_ALLOCATIONORDER_H
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/MC/MCRegister.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/Register.h"
namespace llvm {
@@ -28,67 +28,95 @@ class VirtRegMap;
class LiveRegMatrix;
class LLVM_LIBRARY_VISIBILITY AllocationOrder {
- SmallVector<MCPhysReg, 16> Hints;
+ const SmallVector<MCPhysReg, 16> Hints;
ArrayRef<MCPhysReg> Order;
- int Pos;
-
- // If HardHints is true, *only* Hints will be returned.
- bool HardHints;
+ // How far into the Order we can iterate. This is 0 if the AllocationOrder is
+ // constructed with HardHints = true, Order.size() otherwise. While
+ // technically a size_t, it will participate in comparisons with the
+ // Iterator's Pos, which must be signed, so it's typed here as signed, too, to
+ // avoid warnings and under the assumption that the size of Order is
+ // relatively small.
+ // IterationLimit defines an invalid iterator position.
+ const int IterationLimit;
public:
+ /// Forward iterator for an AllocationOrder.
+ class Iterator final {
+ const AllocationOrder &AO;
+ int Pos = 0;
+
+ public:
+ Iterator(const AllocationOrder &AO, int Pos) : AO(AO), Pos(Pos) {}
+
+ /// Return true if the curent position is that of a preferred register.
+ bool isHint() const { return Pos < 0; }
+
+ /// Return the next physical register in the allocation order.
+ MCRegister operator*() const {
+ if (Pos < 0)
+ return AO.Hints.end()[Pos];
+ assert(Pos < AO.IterationLimit);
+ return AO.Order[Pos];
+ }
+
+ /// Advance the iterator to the next position. If that's past the Hints
+ /// list, advance to the first value that's not also in the Hints list.
+ Iterator &operator++() {
+ if (Pos < AO.IterationLimit)
+ ++Pos;
+ while (Pos >= 0 && Pos < AO.IterationLimit && AO.isHint(AO.Order[Pos]))
+ ++Pos;
+ return *this;
+ }
+
+ bool operator==(const Iterator &Other) const {
+ assert(&AO == &Other.AO);
+ return Pos == Other.Pos;
+ }
+
+ bool operator!=(const Iterator &Other) const { return !(*this == Other); }
+ };
/// Create a new AllocationOrder for VirtReg.
/// @param VirtReg Virtual register to allocate for.
/// @param VRM Virtual register map for function.
/// @param RegClassInfo Information about reserved and allocatable registers.
- AllocationOrder(unsigned VirtReg,
- const VirtRegMap &VRM,
- const RegisterClassInfo &RegClassInfo,
- const LiveRegMatrix *Matrix);
-
- /// Get the allocation order without reordered hints.
- ArrayRef<MCPhysReg> getOrder() const { return Order; }
-
- /// Return the next physical register in the allocation order, or 0.
- /// It is safe to call next() again after it returned 0, it will keep
- /// returning 0 until rewind() is called.
- unsigned next(unsigned Limit = 0) {
- if (Pos < 0)
- return Hints.end()[Pos++];
- if (HardHints)
- return 0;
- if (!Limit)
- Limit = Order.size();
- while (Pos < int(Limit)) {
- unsigned Reg = Order[Pos++];
- if (!isHint(Reg))
- return Reg;
- }
- return 0;
+ static AllocationOrder create(unsigned VirtReg, const VirtRegMap &VRM,
+ const RegisterClassInfo &RegClassInfo,
+ const LiveRegMatrix *Matrix);
+
+ /// Create an AllocationOrder given the Hits, Order, and HardHits values.
+ /// Use the create method above - the ctor is for unittests.
+ AllocationOrder(SmallVector<MCPhysReg, 16> &&Hints, ArrayRef<MCPhysReg> Order,
+ bool HardHints)
+ : Hints(std::move(Hints)), Order(Order),
+ IterationLimit(HardHints ? 0 : static_cast<int>(Order.size())) {}
+
+ Iterator begin() const {
+ return Iterator(*this, -(static_cast<int>(Hints.size())));
}
- /// As next(), but allow duplicates to be returned, and stop before the
- /// Limit'th register in the RegisterClassInfo allocation order.
- ///
- /// This can produce more than Limit registers if there are hints.
- unsigned nextWithDups(unsigned Limit) {
- if (Pos < 0)
- return Hints.end()[Pos++];
- if (HardHints)
- return 0;
- if (Pos < int(Limit))
- return Order[Pos++];
- return 0;
- }
+ Iterator end() const { return Iterator(*this, IterationLimit); }
- /// Start over from the beginning.
- void rewind() { Pos = -int(Hints.size()); }
+ Iterator getOrderLimitEnd(unsigned OrderLimit) const {
+ assert(OrderLimit <= Order.size());
+ if (OrderLimit == 0)
+ return end();
+ Iterator Ret(*this,
+ std::min(static_cast<int>(OrderLimit) - 1, IterationLimit));
+ return ++Ret;
+ }
- /// Return true if the last register returned from next() was a preferred register.
- bool isHint() const { return Pos <= 0; }
+ /// Get the allocation order without reordered hints.
+ ArrayRef<MCPhysReg> getOrder() const { return Order; }
- /// Return true if PhysReg is a preferred register.
- bool isHint(unsigned PhysReg) const { return is_contained(Hints, PhysReg); }
+ /// Return true if Reg is a preferred physical register.
+ bool isHint(Register Reg) const {
+ assert(!Reg.isPhysical() ||
+ Reg.id() <
+ static_cast<uint32_t>(std::numeric_limits<MCPhysReg>::max()));
+ return Reg.isPhysical() && is_contained(Hints, Reg.id());
+ }
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp
index 7da28ffec85c..ebeff1fec30b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp
@@ -88,19 +88,25 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
uint64_t StartingOffset) {
// Given a struct type, recursively traverse the elements.
if (StructType *STy = dyn_cast<StructType>(Ty)) {
- const StructLayout *SL = DL.getStructLayout(STy);
+ // If the Offsets aren't needed, don't query the struct layout. This allows
+ // us to support structs with scalable vectors for operations that don't
+ // need offsets.
+ const StructLayout *SL = Offsets ? DL.getStructLayout(STy) : nullptr;
for (StructType::element_iterator EB = STy->element_begin(),
EI = EB,
EE = STy->element_end();
- EI != EE; ++EI)
+ EI != EE; ++EI) {
+ // Don't compute the element offset if we didn't get a StructLayout above.
+ uint64_t EltOffset = SL ? SL->getElementOffset(EI - EB) : 0;
ComputeValueVTs(TLI, DL, *EI, ValueVTs, MemVTs, Offsets,
- StartingOffset + SL->getElementOffset(EI - EB));
+ StartingOffset + EltOffset);
+ }
return;
}
// Given an array type, recursively traverse the elements.
if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
Type *EltTy = ATy->getElementType();
- uint64_t EltSize = DL.getTypeAllocSize(EltTy);
+ uint64_t EltSize = DL.getTypeAllocSize(EltTy).getFixedValue();
for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
ComputeValueVTs(TLI, DL, EltTy, ValueVTs, MemVTs, Offsets,
StartingOffset + i * EltSize);
@@ -131,16 +137,21 @@ void llvm::computeValueLLTs(const DataLayout &DL, Type &Ty,
uint64_t StartingOffset) {
// Given a struct type, recursively traverse the elements.
if (StructType *STy = dyn_cast<StructType>(&Ty)) {
- const StructLayout *SL = DL.getStructLayout(STy);
- for (unsigned I = 0, E = STy->getNumElements(); I != E; ++I)
+ // If the Offsets aren't needed, don't query the struct layout. This allows
+ // us to support structs with scalable vectors for operations that don't
+ // need offsets.
+ const StructLayout *SL = Offsets ? DL.getStructLayout(STy) : nullptr;
+ for (unsigned I = 0, E = STy->getNumElements(); I != E; ++I) {
+ uint64_t EltOffset = SL ? SL->getElementOffset(I) : 0;
computeValueLLTs(DL, *STy->getElementType(I), ValueTys, Offsets,
- StartingOffset + SL->getElementOffset(I));
+ StartingOffset + EltOffset);
+ }
return;
}
// Given an array type, recursively traverse the elements.
if (ArrayType *ATy = dyn_cast<ArrayType>(&Ty)) {
Type *EltTy = ATy->getElementType();
- uint64_t EltSize = DL.getTypeAllocSize(EltTy);
+ uint64_t EltSize = DL.getTypeAllocSize(EltTy).getFixedValue();
for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
computeValueLLTs(DL, *EltTy, ValueTys, Offsets,
StartingOffset + i * EltSize);
@@ -174,27 +185,6 @@ GlobalValue *llvm::ExtractTypeInfo(Value *V) {
return GV;
}
-/// hasInlineAsmMemConstraint - Return true if the inline asm instruction being
-/// processed uses a memory 'm' constraint.
-bool
-llvm::hasInlineAsmMemConstraint(InlineAsm::ConstraintInfoVector &CInfos,
- const TargetLowering &TLI) {
- for (unsigned i = 0, e = CInfos.size(); i != e; ++i) {
- InlineAsm::ConstraintInfo &CI = CInfos[i];
- for (unsigned j = 0, ee = CI.Codes.size(); j != ee; ++j) {
- TargetLowering::ConstraintType CType = TLI.getConstraintType(CI.Codes[j]);
- if (CType == TargetLowering::C_Memory)
- return true;
- }
-
- // Indirect operand accesses access memory.
- if (CI.isIndirect)
- return true;
- }
-
- return false;
-}
-
/// getFCmpCondCode - Return the ISD condition code corresponding to
/// the given LLVM IR floating-point condition code. This includes
/// consideration of global floating-point math flags.
@@ -537,11 +527,15 @@ bool llvm::isInTailCallPosition(const CallBase &Call, const TargetMachine &TM) {
// Debug info intrinsics do not get in the way of tail call optimization.
if (isa<DbgInfoIntrinsic>(BBI))
continue;
- // A lifetime end or assume intrinsic should not stop tail call
- // optimization.
+ // Pseudo probe intrinsics do not block tail call optimization either.
+ if (isa<PseudoProbeInst>(BBI))
+ continue;
+ // A lifetime end, assume or noalias.decl intrinsic should not stop tail
+ // call optimization.
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(BBI))
if (II->getIntrinsicID() == Intrinsic::lifetime_end ||
- II->getIntrinsicID() == Intrinsic::assume)
+ II->getIntrinsicID() == Intrinsic::assume ||
+ II->getIntrinsicID() == Intrinsic::experimental_noalias_scope_decl)
continue;
if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() ||
!isSafeToSpeculativelyExecute(&*BBI))
@@ -739,8 +733,7 @@ static void collectEHScopeMembers(
if (Visiting->isEHScopeReturnBlock())
continue;
- for (const MachineBasicBlock *Succ : Visiting->successors())
- Worklist.push_back(Succ);
+ append_range(Worklist, Visiting->successors());
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp
new file mode 100644
index 000000000000..95d878e65be4
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp
@@ -0,0 +1,79 @@
+//===-- CodeGen/AsmPrinter/AIXException.cpp - AIX Exception Impl ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing AIX exception info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfException.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/MC/MCSectionXCOFF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+AIXException::AIXException(AsmPrinter *A) : DwarfCFIExceptionBase(A) {}
+
+void AIXException::emitExceptionInfoTable(const MCSymbol *LSDA,
+ const MCSymbol *PerSym) {
+ // Generate EH Info Table.
+ // The EH Info Table, aka, 'compat unwind section' on AIX, have the following
+ // format: struct eh_info_t {
+ // unsigned version; /* EH info verion 0 */
+ // #if defined(__64BIT__)
+ // char _pad[4]; /* padding */
+ // #endif
+ // unsigned long lsda; /* Pointer to LSDA */
+ // unsigned long personality; /* Pointer to the personality routine */
+ // }
+
+ Asm->OutStreamer->SwitchSection(
+ Asm->getObjFileLowering().getCompactUnwindSection());
+ MCSymbol *EHInfoLabel =
+ TargetLoweringObjectFileXCOFF::getEHInfoTableSymbol(Asm->MF);
+ Asm->OutStreamer->emitLabel(EHInfoLabel);
+
+ // Version number.
+ Asm->emitInt32(0);
+
+ const DataLayout &DL = MMI->getModule()->getDataLayout();
+ const unsigned PointerSize = DL.getPointerSize();
+
+ // Add necessary paddings in 64 bit mode.
+ Asm->OutStreamer->emitValueToAlignment(PointerSize);
+
+ // LSDA location.
+ Asm->OutStreamer->emitValue(MCSymbolRefExpr::create(LSDA, Asm->OutContext),
+ PointerSize);
+
+ // Personality routine.
+ Asm->OutStreamer->emitValue(MCSymbolRefExpr::create(PerSym, Asm->OutContext),
+ PointerSize);
+}
+
+void AIXException::endFunction(const MachineFunction *MF) {
+ if (!TargetLoweringObjectFileXCOFF::ShouldEmitEHBlock(MF))
+ return;
+
+ const MCSymbol *LSDALabel = emitExceptionTable();
+
+ const Function &F = MF->getFunction();
+ assert(F.hasPersonalityFn() &&
+ "Landingpads are presented, but no personality routine is found.");
+ const Function *Per =
+ dyn_cast<Function>(F.getPersonalityFn()->stripPointerCasts());
+ const MCSymbol *PerSym = Asm->TM.getSymbol(Per);
+
+ emitExceptionInfoTable(LSDALabel, PerSym);
+}
+
+} // End of namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
index dea0227f7578..4e45a0ffc60f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
@@ -190,7 +190,6 @@ public:
template <typename DataT>
class Dwarf5AccelTableWriter : public AccelTableWriter {
struct Header {
- uint32_t UnitLength = 0;
uint16_t Version = 5;
uint16_t Padding = 0;
uint32_t CompUnitCount;
@@ -271,7 +270,7 @@ void AccelTableWriter::emitOffsets(const MCSymbol *Base) const {
continue;
PrevHash = HashValue;
Asm->OutStreamer->AddComment("Offset in Bucket " + Twine(i));
- Asm->emitLabelDifference(Hash->Sym, Base, sizeof(uint32_t));
+ Asm->emitLabelDifference(Hash->Sym, Base, Asm->getDwarfOffsetByteSize());
}
}
}
@@ -367,9 +366,8 @@ void Dwarf5AccelTableWriter<DataT>::Header::emit(
assert(CompUnitCount > 0 && "Index must have at least one CU.");
AsmPrinter *Asm = Ctx.Asm;
- Asm->OutStreamer->AddComment("Header: unit length");
- Asm->emitLabelDifference(Ctx.ContributionEnd, Ctx.ContributionStart,
- sizeof(uint32_t));
+ Asm->emitDwarfUnitLength(Ctx.ContributionEnd, Ctx.ContributionStart,
+ "Header: unit length");
Asm->OutStreamer->emitLabel(Ctx.ContributionStart);
Asm->OutStreamer->AddComment("Header: version");
Asm->emitInt16(Version);
@@ -506,7 +504,7 @@ template <typename DataT> void Dwarf5AccelTableWriter<DataT>::emitData() const {
for (const auto *Value : Hash->Values)
emitEntry(*static_cast<const DataT *>(Value));
Asm->OutStreamer->AddComment("End of list: " + Hash->Name.getString());
- Asm->emitInt32(0);
+ Asm->emitInt8(0);
}
}
}
@@ -593,10 +591,14 @@ void llvm::emitDWARF5AccelTable(
}
void AppleAccelTableOffsetData::emit(AsmPrinter *Asm) const {
+ assert(Die.getDebugSectionOffset() <= UINT32_MAX &&
+ "The section offset exceeds the limit.");
Asm->emitInt32(Die.getDebugSectionOffset());
}
void AppleAccelTableTypeData::emit(AsmPrinter *Asm) const {
+ assert(Die.getDebugSectionOffset() <= UINT32_MAX &&
+ "The section offset exceeds the limit.");
Asm->emitInt32(Die.getDebugSectionOffset());
Asm->emitInt16(Die.getTag());
Asm->emitInt8(0);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp
index 883aaf5aefc4..3df8e35accc4 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp
@@ -29,9 +29,7 @@ MCSymbol *AddressPool::emitHeader(AsmPrinter &Asm, MCSection *Section) {
MCSymbol *BeginLabel = Asm.createTempSymbol(Prefix + "start");
MCSymbol *EndLabel = Asm.createTempSymbol(Prefix + "end");
- Asm.OutStreamer->AddComment("Length of contribution");
- Asm.emitLabelDifference(EndLabel, BeginLabel,
- 4); // TODO: Support DWARF64 format.
+ Asm.emitDwarfUnitLength(EndLabel, BeginLabel, "Length of contribution");
Asm.OutStreamer->emitLabel(BeginLabel);
Asm.OutStreamer->AddComment("DWARF version number");
Asm.emitInt16(Asm.getDwarfVersion());
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AddressPool.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AddressPool.h
index f92cf72093ca..f1edc6c330d5 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AddressPool.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AddressPool.h
@@ -48,7 +48,7 @@ public:
bool hasBeenUsed() const { return HasBeenUsed; }
- void resetUsedFlag() { HasBeenUsed = false; }
+ void resetUsedFlag(bool HasBeenUsed = false) { this->HasBeenUsed = HasBeenUsed; }
MCSymbol *getLabel() { return AddressTableBaseSym; }
void setLabel(MCSymbol *Sym) { AddressTableBaseSym = Sym; }
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index c7eb0257d71b..85754bf29d0c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -14,6 +14,7 @@
#include "CodeViewDebug.h"
#include "DwarfDebug.h"
#include "DwarfException.h"
+#include "PseudoProbePrinter.h"
#include "WasmException.h"
#include "WinCFGuard.h"
#include "WinException.h"
@@ -30,6 +31,7 @@
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/BinaryFormat/Dwarf.h"
@@ -77,6 +79,7 @@
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/PseudoProbe.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/MC/MCAsmInfo.h"
@@ -131,17 +134,25 @@ using namespace llvm;
#define DEBUG_TYPE "asm-printer"
-static const char *const DWARFGroupName = "dwarf";
-static const char *const DWARFGroupDescription = "DWARF Emission";
-static const char *const DbgTimerName = "emit";
-static const char *const DbgTimerDescription = "Debug Info Emission";
-static const char *const EHTimerName = "write_exception";
-static const char *const EHTimerDescription = "DWARF Exception Writer";
-static const char *const CFGuardName = "Control Flow Guard";
-static const char *const CFGuardDescription = "Control Flow Guard";
-static const char *const CodeViewLineTablesGroupName = "linetables";
-static const char *const CodeViewLineTablesGroupDescription =
- "CodeView Line Tables";
+// FIXME: this option currently only applies to DWARF, and not CodeView, tables
+static cl::opt<bool>
+ DisableDebugInfoPrinting("disable-debug-info-print", cl::Hidden,
+ cl::desc("Disable debug info printing"));
+
+const char DWARFGroupName[] = "dwarf";
+const char DWARFGroupDescription[] = "DWARF Emission";
+const char DbgTimerName[] = "emit";
+const char DbgTimerDescription[] = "Debug Info Emission";
+const char EHTimerName[] = "write_exception";
+const char EHTimerDescription[] = "DWARF Exception Writer";
+const char CFGuardName[] = "Control Flow Guard";
+const char CFGuardDescription[] = "Control Flow Guard";
+const char CodeViewLineTablesGroupName[] = "linetables";
+const char CodeViewLineTablesGroupDescription[] = "CodeView Line Tables";
+const char PPTimerName[] = "emit";
+const char PPTimerDescription[] = "Pseudo Probe Emission";
+const char PPGroupName[] = "pseudo probe";
+const char PPGroupDescription[] = "Pseudo Probe Emission";
STATISTIC(EmittedInsts, "Number of machine instrs printed");
@@ -188,7 +199,8 @@ AsmPrinter::AsmPrinter(TargetMachine &tm, std::unique_ptr<MCStreamer> Streamer)
}
AsmPrinter::~AsmPrinter() {
- assert(!DD && Handlers.empty() && "Debug/EH info didn't get finalized");
+ assert(!DD && Handlers.size() == NumUserHandlers &&
+ "Debug/EH info didn't get finalized");
if (GCMetadataPrinters) {
gcp_map_type &GCMap = getGCMap(GCMetadataPrinters);
@@ -231,9 +243,11 @@ void AsmPrinter::EmitToStreamer(MCStreamer &S, const MCInst &Inst) {
}
void AsmPrinter::emitInitialRawDwarfLocDirective(const MachineFunction &MF) {
- assert(DD && "Dwarf debug file is not defined.");
- assert(OutStreamer->hasRawTextSupport() && "Expected assembly output mode.");
- (void)DD->emitInitialLocDirective(MF, /*CUID=*/0);
+ if (DD) {
+ assert(OutStreamer->hasRawTextSupport() &&
+ "Expected assembly output mode.");
+ (void)DD->emitInitialLocDirective(MF, /*CUID=*/0);
+ }
}
/// getCurrentSection() - Return the current section we are emitting to.
@@ -261,6 +275,9 @@ bool AsmPrinter::doInitialization(Module &M) {
OutStreamer->InitSections(false);
+ if (DisableDebugInfoPrinting)
+ MMI->setDebugInfoAvailability(false);
+
// Emit the version-min deployment target directive if needed.
//
// FIXME: If we end up with a collection of these sorts of Darwin-specific
@@ -296,6 +313,7 @@ bool AsmPrinter::doInitialization(Module &M) {
std::unique_ptr<MCSubtargetInfo> STI(TM.getTarget().createMCSubtargetInfo(
TM.getTargetTriple().str(), TM.getTargetCPU(),
TM.getTargetFeatureString()));
+ assert(STI && "Unable to create subtarget info");
OutStreamer->AddComment("Start of file scope inline assembly");
OutStreamer->AddBlankLine();
emitInlineAsm(M.getModuleInlineAsm() + "\n",
@@ -313,14 +331,21 @@ bool AsmPrinter::doInitialization(Module &M) {
CodeViewLineTablesGroupDescription);
}
if (!EmitCodeView || M.getDwarfVersion()) {
- DD = new DwarfDebug(this, &M);
- DD->beginModule();
- Handlers.emplace_back(std::unique_ptr<DwarfDebug>(DD), DbgTimerName,
- DbgTimerDescription, DWARFGroupName,
- DWARFGroupDescription);
+ if (!DisableDebugInfoPrinting) {
+ DD = new DwarfDebug(this);
+ Handlers.emplace_back(std::unique_ptr<DwarfDebug>(DD), DbgTimerName,
+ DbgTimerDescription, DWARFGroupName,
+ DWARFGroupDescription);
+ }
}
}
+ if (M.getNamedMetadata(PseudoProbeDescMetadataName)) {
+ PP = new PseudoProbeHandler(this, &M);
+ Handlers.emplace_back(std::unique_ptr<PseudoProbeHandler>(PP), PPTimerName,
+ PPTimerDescription, PPGroupName, PPGroupDescription);
+ }
+
switch (MAI->getExceptionHandlingType()) {
case ExceptionHandling::SjLj:
case ExceptionHandling::DwarfCFI:
@@ -368,6 +393,9 @@ bool AsmPrinter::doInitialization(Module &M) {
case ExceptionHandling::Wasm:
ES = new WasmException(this);
break;
+ case ExceptionHandling::AIX:
+ ES = new AIXException(this);
+ break;
}
if (ES)
Handlers.emplace_back(std::unique_ptr<EHStreamer>(ES), EHTimerName,
@@ -379,6 +407,13 @@ bool AsmPrinter::doInitialization(Module &M) {
Handlers.emplace_back(std::make_unique<WinCFGuard>(this), CFGuardName,
CFGuardDescription, DWARFGroupName,
DWARFGroupDescription);
+
+ for (const HandlerInfo &HI : Handlers) {
+ NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName,
+ HI.TimerGroupDescription, TimePassesIsEnabled);
+ HI.Handler->beginModule(&M);
+ }
+
return false;
}
@@ -449,10 +484,8 @@ MCSymbol *AsmPrinter::getSymbolPreferLocal(const GlobalValue &GV) const {
if (TM.getTargetTriple().isOSBinFormatELF() && GV.canBenefitFromLocalAlias()) {
const Module &M = *GV.getParent();
if (TM.getRelocationModel() != Reloc::Static &&
- M.getPIELevel() == PIELevel::Default)
- if (GV.isDSOLocal() || (TM.getTargetTriple().isX86() &&
- GV.getParent()->noSemanticInterposition()))
- return getSymbolWithGlobalValueBase(&GV, "$local");
+ M.getPIELevel() == PIELevel::Default && GV.isDSOLocal())
+ return getSymbolWithGlobalValueBase(&GV, "$local");
}
return TM.getSymbol(&GV);
}
@@ -500,8 +533,8 @@ void AsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
GVSym->redefineIfPossible();
if (GVSym->isDefined() || GVSym->isVariable())
- report_fatal_error("symbol '" + Twine(GVSym->getName()) +
- "' is already defined");
+ OutContext.reportError(SMLoc(), "symbol '" + Twine(GVSym->getName()) +
+ "' is already defined");
if (MAI->hasDotTypeDotSizeDirective())
OutStreamer->emitSymbolAttribute(EmittedSym, MCSA_ELF_TypeObject);
@@ -812,13 +845,21 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
if ((Size = MI.getRestoreSize(TII))) {
CommentOS << *Size << "-byte Reload\n";
} else if ((Size = MI.getFoldedRestoreSize(TII))) {
- if (*Size)
- CommentOS << *Size << "-byte Folded Reload\n";
+ if (*Size) {
+ if (*Size == unsigned(MemoryLocation::UnknownSize))
+ CommentOS << "Unknown-size Folded Reload\n";
+ else
+ CommentOS << *Size << "-byte Folded Reload\n";
+ }
} else if ((Size = MI.getSpillSize(TII))) {
CommentOS << *Size << "-byte Spill\n";
} else if ((Size = MI.getFoldedSpillSize(TII))) {
- if (*Size)
- CommentOS << *Size << "-byte Folded Spill\n";
+ if (*Size) {
+ if (*Size == unsigned(MemoryLocation::UnknownSize))
+ CommentOS << "Unknown-size Folded Spill\n";
+ else
+ CommentOS << *Size << "-byte Folded Spill\n";
+ }
}
// Check for spill-induced copies
@@ -877,7 +918,7 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
// The second operand is only an offset if it's an immediate.
bool MemLoc = MI->isIndirectDebugValue();
- int64_t Offset = MemLoc ? MI->getOperand(1).getImm() : 0;
+ auto Offset = StackOffset::getFixed(MemLoc ? MI->getOperand(1).getImm() : 0);
const DIExpression *Expr = MI->getDebugExpression();
if (Expr->getNumElements()) {
OS << '[';
@@ -916,6 +957,8 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
} else if (MI->getDebugOperand(0).isTargetIndex()) {
auto Op = MI->getDebugOperand(0);
OS << "!target-index(" << Op.getIndex() << "," << Op.getOffset() << ")";
+ // NOTE: Want this comment at start of line, don't emit with AddComment.
+ AP.OutStreamer->emitRawComment(OS.str());
return true;
} else {
Register Reg;
@@ -941,7 +984,7 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
}
if (MemLoc)
- OS << '+' << Offset << ']';
+ OS << '+' << Offset.getFixed() << ']';
// NOTE: Want this comment at start of line, don't emit with AddComment.
AP.OutStreamer->emitRawComment(OS.str());
@@ -1023,6 +1066,56 @@ void AsmPrinter::emitFrameAlloc(const MachineInstr &MI) {
MCConstantExpr::create(FrameOffset, OutContext));
}
+/// Returns the BB metadata to be emitted in the .llvm_bb_addr_map section for a
+/// given basic block. This can be used to capture more precise profile
+/// information. We use the last 3 bits (LSBs) to ecnode the following
+/// information:
+/// * (1): set if return block (ret or tail call).
+/// * (2): set if ends with a tail call.
+/// * (3): set if exception handling (EH) landing pad.
+/// The remaining bits are zero.
+static unsigned getBBAddrMapMetadata(const MachineBasicBlock &MBB) {
+ const TargetInstrInfo *TII = MBB.getParent()->getSubtarget().getInstrInfo();
+ return ((unsigned)MBB.isReturnBlock()) |
+ ((!MBB.empty() && TII->isTailCall(MBB.back())) << 1) |
+ (MBB.isEHPad() << 2);
+}
+
+void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
+ MCSection *BBAddrMapSection =
+ getObjFileLowering().getBBAddrMapSection(*MF.getSection());
+ assert(BBAddrMapSection && ".llvm_bb_addr_map section is not initialized.");
+
+ const MCSymbol *FunctionSymbol = getFunctionBegin();
+
+ OutStreamer->PushSection();
+ OutStreamer->SwitchSection(BBAddrMapSection);
+ OutStreamer->emitSymbolValue(FunctionSymbol, getPointerSize());
+ // Emit the total number of basic blocks in this function.
+ OutStreamer->emitULEB128IntValue(MF.size());
+ // Emit BB Information for each basic block in the funciton.
+ for (const MachineBasicBlock &MBB : MF) {
+ const MCSymbol *MBBSymbol =
+ MBB.isEntryBlock() ? FunctionSymbol : MBB.getSymbol();
+ // Emit the basic block offset.
+ emitLabelDifferenceAsULEB128(MBBSymbol, FunctionSymbol);
+ // Emit the basic block size. When BBs have alignments, their size cannot
+ // always be computed from their offsets.
+ emitLabelDifferenceAsULEB128(MBB.getEndSymbol(), MBBSymbol);
+ OutStreamer->emitULEB128IntValue(getBBAddrMapMetadata(MBB));
+ }
+ OutStreamer->PopSection();
+}
+
+void AsmPrinter::emitPseudoProbe(const MachineInstr &MI) {
+ auto GUID = MI.getOperand(0).getImm();
+ auto Index = MI.getOperand(1).getImm();
+ auto Type = MI.getOperand(2).getImm();
+ auto Attr = MI.getOperand(3).getImm();
+ DILocation *DebugLoc = MI.getDebugLoc();
+ PP->emitPseudoProbe(GUID, Index, Type, Attr, DebugLoc);
+}
+
void AsmPrinter::emitStackSizeSection(const MachineFunction &MF) {
if (!MF.getTarget().Options.EmitStackSizeSection)
return;
@@ -1069,8 +1162,6 @@ void AsmPrinter::emitFunctionBody() {
// Emit target-specific gunk before the function body.
emitFunctionBodyStart();
- bool ShouldPrintDebugScopes = MMI->hasDebugInfo();
-
if (isVerbose()) {
// Get MachineDominatorTree or compute it on the fly if it's unavailable
MDT = getAnalysisIfAvailable<MachineDominatorTree>();
@@ -1093,9 +1184,11 @@ void AsmPrinter::emitFunctionBody() {
bool HasAnyRealCode = false;
int NumInstsInFunction = 0;
+ bool CanDoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
for (auto &MBB : *MF) {
// Print a label for the basic block.
emitBasicBlockStart(MBB);
+ DenseMap<StringRef, unsigned> MnemonicCounts;
for (auto &MI : MBB) {
// Print the assembly for the instruction.
if (!MI.isPosition() && !MI.isImplicitDef() && !MI.isKill() &&
@@ -1108,13 +1201,10 @@ void AsmPrinter::emitFunctionBody() {
if (MCSymbol *S = MI.getPreInstrSymbol())
OutStreamer->emitLabel(S);
- if (ShouldPrintDebugScopes) {
- for (const HandlerInfo &HI : Handlers) {
- NamedRegionTimer T(HI.TimerName, HI.TimerDescription,
- HI.TimerGroupName, HI.TimerGroupDescription,
- TimePassesIsEnabled);
- HI.Handler->beginInstruction(&MI);
- }
+ for (const HandlerInfo &HI : Handlers) {
+ NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName,
+ HI.TimerGroupDescription, TimePassesIsEnabled);
+ HI.Handler->beginInstruction(&MI);
}
if (isVerbose())
@@ -1142,6 +1232,11 @@ void AsmPrinter::emitFunctionBody() {
emitInstruction(&MI);
}
break;
+ case TargetOpcode::DBG_INSTR_REF:
+ // This instruction reference will have been resolved to a machine
+ // location, and a nearby DBG_VALUE created. We can safely ignore
+ // the instruction reference.
+ break;
case TargetOpcode::DBG_LABEL:
if (isVerbose()) {
if (!emitDebugLabelComment(&MI, *this))
@@ -1154,8 +1249,18 @@ void AsmPrinter::emitFunctionBody() {
case TargetOpcode::KILL:
if (isVerbose()) emitKill(&MI, *this);
break;
+ case TargetOpcode::PSEUDO_PROBE:
+ emitPseudoProbe(MI);
+ break;
default:
emitInstruction(&MI);
+ if (CanDoExtraAnalysis) {
+ MCInst MCI;
+ MCI.setOpcode(MI.getOpcode());
+ auto Name = OutStreamer->getMnemonic(MCI);
+ auto I = MnemonicCounts.insert({Name, 0u});
+ I.first->second++;
+ }
break;
}
@@ -1163,54 +1268,69 @@ void AsmPrinter::emitFunctionBody() {
if (MCSymbol *S = MI.getPostInstrSymbol())
OutStreamer->emitLabel(S);
- if (ShouldPrintDebugScopes) {
- for (const HandlerInfo &HI : Handlers) {
- NamedRegionTimer T(HI.TimerName, HI.TimerDescription,
- HI.TimerGroupName, HI.TimerGroupDescription,
- TimePassesIsEnabled);
- HI.Handler->endInstruction();
- }
+ for (const HandlerInfo &HI : Handlers) {
+ NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName,
+ HI.TimerGroupDescription, TimePassesIsEnabled);
+ HI.Handler->endInstruction();
}
}
- // We need a temporary symbol for the end of this basic block, if either we
- // have BBLabels enabled and we want to emit size directive for the BBs, or
- // if this basic blocks marks the end of a section (except the section
- // containing the entry basic block as the end symbol for that section is
- // CurrentFnEnd).
- MCSymbol *CurrentBBEnd = nullptr;
- if ((MAI->hasDotTypeDotSizeDirective() && MF->hasBBLabels()) ||
- (MBB.isEndSection() && !MBB.sameSection(&MF->front()))) {
- CurrentBBEnd = OutContext.createTempSymbol();
- OutStreamer->emitLabel(CurrentBBEnd);
- }
+ // We must emit temporary symbol for the end of this basic block, if either
+ // we have BBLabels enabled or if this basic blocks marks the end of a
+ // section (except the section containing the entry basic block as the end
+ // symbol for that section is CurrentFnEnd).
+ if (MF->hasBBLabels() ||
+ (MAI->hasDotTypeDotSizeDirective() && MBB.isEndSection() &&
+ !MBB.sameSection(&MF->front())))
+ OutStreamer->emitLabel(MBB.getEndSymbol());
- // Helper for emitting the size directive associated with a basic block
- // symbol.
- auto emitELFSizeDirective = [&](MCSymbol *SymForSize) {
- assert(CurrentBBEnd && "Basicblock end symbol not set!");
- const MCExpr *SizeExp = MCBinaryExpr::createSub(
- MCSymbolRefExpr::create(CurrentBBEnd, OutContext),
- MCSymbolRefExpr::create(SymForSize, OutContext), OutContext);
- OutStreamer->emitELFSize(SymForSize, SizeExp);
- };
-
- // Emit size directive for the size of each basic block, if BBLabels is
- // enabled.
- if (MAI->hasDotTypeDotSizeDirective() && MF->hasBBLabels())
- emitELFSizeDirective(MBB.getSymbol());
-
- // Emit size directive for the size of each basic block section once we
- // get to the end of that section.
if (MBB.isEndSection()) {
+ // The size directive for the section containing the entry block is
+ // handled separately by the function section.
if (!MBB.sameSection(&MF->front())) {
- if (MAI->hasDotTypeDotSizeDirective())
- emitELFSizeDirective(CurrentSectionBeginSym);
+ if (MAI->hasDotTypeDotSizeDirective()) {
+ // Emit the size directive for the basic block section.
+ const MCExpr *SizeExp = MCBinaryExpr::createSub(
+ MCSymbolRefExpr::create(MBB.getEndSymbol(), OutContext),
+ MCSymbolRefExpr::create(CurrentSectionBeginSym, OutContext),
+ OutContext);
+ OutStreamer->emitELFSize(CurrentSectionBeginSym, SizeExp);
+ }
MBBSectionRanges[MBB.getSectionIDNum()] =
- MBBSectionRange{CurrentSectionBeginSym, CurrentBBEnd};
+ MBBSectionRange{CurrentSectionBeginSym, MBB.getEndSymbol()};
}
}
emitBasicBlockEnd(MBB);
+
+ if (CanDoExtraAnalysis) {
+ // Skip empty blocks.
+ if (MBB.empty())
+ continue;
+
+ MachineOptimizationRemarkAnalysis R(DEBUG_TYPE, "InstructionMix",
+ MBB.begin()->getDebugLoc(), &MBB);
+
+ // Generate instruction mix remark. First, sort counts in descending order
+ // by count and name.
+ SmallVector<std::pair<StringRef, unsigned>, 128> MnemonicVec;
+ for (auto &KV : MnemonicCounts)
+ MnemonicVec.emplace_back(KV.first, KV.second);
+
+ sort(MnemonicVec, [](const std::pair<StringRef, unsigned> &A,
+ const std::pair<StringRef, unsigned> &B) {
+ if (A.second > B.second)
+ return true;
+ if (A.second == B.second)
+ return StringRef(A.first) < StringRef(B.first);
+ return false;
+ });
+ R << "BasicBlock: " << ore::NV("BasicBlock", MBB.getName()) << "\n";
+ for (auto &KV : MnemonicVec) {
+ auto Name = (Twine("INST_") + KV.first.trim()).str();
+ R << KV.first << ": " << ore::NV(Name, KV.second) << "\n";
+ }
+ ORE->emit(R);
+ }
}
EmittedInsts += NumInstsInFunction;
@@ -1297,6 +1417,11 @@ void AsmPrinter::emitFunctionBody() {
HI.Handler->endFunction(MF);
}
+ // Emit section containing BB address offsets and their metadata, when
+ // BB labels are requested for this function.
+ if (MF->hasBBLabels())
+ emitBBAddrMapSection(*MF);
+
// Emit section containing stack size metadata.
emitStackSizeSection(*MF);
@@ -1390,16 +1515,7 @@ void AsmPrinter::emitGlobalGOTEquivs() {
void AsmPrinter::emitGlobalIndirectSymbol(Module &M,
const GlobalIndirectSymbol& GIS) {
MCSymbol *Name = getSymbol(&GIS);
-
- if (GIS.hasExternalLinkage() || !MAI->getWeakRefDirective())
- OutStreamer->emitSymbolAttribute(Name, MCSA_Global);
- else if (GIS.hasWeakLinkage() || GIS.hasLinkOnceLinkage())
- OutStreamer->emitSymbolAttribute(Name, MCSA_WeakReference);
- else
- assert(GIS.hasLocalLinkage() && "Invalid alias or ifunc linkage");
-
bool IsFunction = GIS.getValueType()->isFunctionTy();
-
// Treat bitcasts of functions as functions also. This is important at least
// on WebAssembly where object and function addresses can't alias each other.
if (!IsFunction)
@@ -1408,6 +1524,30 @@ void AsmPrinter::emitGlobalIndirectSymbol(Module &M,
IsFunction =
CE->getOperand(0)->getType()->getPointerElementType()->isFunctionTy();
+ // AIX's assembly directive `.set` is not usable for aliasing purpose,
+ // so AIX has to use the extra-label-at-definition strategy. At this
+ // point, all the extra label is emitted, we just have to emit linkage for
+ // those labels.
+ if (TM.getTargetTriple().isOSBinFormatXCOFF()) {
+ assert(!isa<GlobalIFunc>(GIS) && "IFunc is not supported on AIX.");
+ assert(MAI->hasVisibilityOnlyWithLinkage() &&
+ "Visibility should be handled with emitLinkage() on AIX.");
+ emitLinkage(&GIS, Name);
+ // If it's a function, also emit linkage for aliases of function entry
+ // point.
+ if (IsFunction)
+ emitLinkage(&GIS,
+ getObjFileLowering().getFunctionEntryPointSymbol(&GIS, TM));
+ return;
+ }
+
+ if (GIS.hasExternalLinkage() || !MAI->getWeakRefDirective())
+ OutStreamer->emitSymbolAttribute(Name, MCSA_Global);
+ else if (GIS.hasWeakLinkage() || GIS.hasLinkOnceLinkage())
+ OutStreamer->emitSymbolAttribute(Name, MCSA_WeakReference);
+ else
+ assert(GIS.hasLocalLinkage() && "Invalid alias or ifunc linkage");
+
// Set the symbol type to function if the alias has a function type.
// This affects codegen when the aliasee is not a function.
if (IsFunction)
@@ -1517,9 +1657,8 @@ bool AsmPrinter::doFinalization(Module &M) {
// Variable `Name` is the function descriptor symbol (see above). Get the
// function entry point symbol.
MCSymbol *FnEntryPointSym = TLOF.getFunctionEntryPointSymbol(&F, TM);
- if (cast<MCSymbolXCOFF>(FnEntryPointSym)->hasRepresentedCsectSet())
- // Emit linkage for the function entry point.
- emitLinkage(&F, FnEntryPointSym);
+ // Emit linkage for the function entry point.
+ emitLinkage(&F, FnEntryPointSym);
// Emit linkage for the function descriptor.
emitLinkage(&F, Name);
@@ -1584,7 +1723,11 @@ bool AsmPrinter::doFinalization(Module &M) {
HI.TimerGroupDescription, TimePassesIsEnabled);
HI.Handler->endModule();
}
- Handlers.clear();
+
+ // This deletes all the ephemeral handlers that AsmPrinter added, while
+ // keeping all the user-added handlers alive until the AsmPrinter is
+ // destroyed.
+ Handlers.erase(Handlers.begin() + NumUserHandlers, Handlers.end());
DD = nullptr;
// If the target wants to know about weak references, print them all.
@@ -1668,51 +1811,6 @@ bool AsmPrinter::doFinalization(Module &M) {
if (MCSection *S = MAI->getNonexecutableStackSection(OutContext))
OutStreamer->SwitchSection(S);
- if (TM.getTargetTriple().isOSBinFormatCOFF()) {
- // Emit /EXPORT: flags for each exported global as necessary.
- const auto &TLOF = getObjFileLowering();
- std::string Flags;
-
- for (const GlobalValue &GV : M.global_values()) {
- raw_string_ostream OS(Flags);
- TLOF.emitLinkerFlagsForGlobal(OS, &GV);
- OS.flush();
- if (!Flags.empty()) {
- OutStreamer->SwitchSection(TLOF.getDrectveSection());
- OutStreamer->emitBytes(Flags);
- }
- Flags.clear();
- }
-
- // Emit /INCLUDE: flags for each used global as necessary.
- if (const auto *LU = M.getNamedGlobal("llvm.used")) {
- assert(LU->hasInitializer() &&
- "expected llvm.used to have an initializer");
- assert(isa<ArrayType>(LU->getValueType()) &&
- "expected llvm.used to be an array type");
- if (const auto *A = cast<ConstantArray>(LU->getInitializer())) {
- for (const Value *Op : A->operands()) {
- const auto *GV = cast<GlobalValue>(Op->stripPointerCasts());
- // Global symbols with internal or private linkage are not visible to
- // the linker, and thus would cause an error when the linker tried to
- // preserve the symbol due to the `/include:` directive.
- if (GV->hasLocalLinkage())
- continue;
-
- raw_string_ostream OS(Flags);
- TLOF.emitLinkerFlagsForUsed(OS, GV);
- OS.flush();
-
- if (!Flags.empty()) {
- OutStreamer->SwitchSection(TLOF.getDrectveSection());
- OutStreamer->emitBytes(Flags);
- }
- Flags.clear();
- }
- }
- }
- }
-
if (TM.Options.EmitAddrsig) {
// Emit address-significance attributes for all globals.
OutStreamer->emitAddrsig();
@@ -1756,10 +1854,11 @@ bool AsmPrinter::doFinalization(Module &M) {
return false;
}
-MCSymbol *AsmPrinter::getCurExceptionSym() {
- if (!CurExceptionSym)
- CurExceptionSym = createTempSymbol("exception");
- return CurExceptionSym;
+MCSymbol *AsmPrinter::getMBBExceptionSym(const MachineBasicBlock &MBB) {
+ auto Res = MBBSectionExceptionSyms.try_emplace(MBB.getSectionIDNum());
+ if (Res.second)
+ Res.first->second = createTempSymbol("exception");
+ return Res.first->second;
}
void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
@@ -1786,13 +1885,13 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
CurrentFnBegin = nullptr;
CurrentSectionBeginSym = nullptr;
MBBSectionRanges.clear();
- CurExceptionSym = nullptr;
+ MBBSectionExceptionSyms.clear();
bool NeedsLocalForSize = MAI->needsLocalForSize();
if (F.hasFnAttribute("patchable-function-entry") ||
F.hasFnAttribute("function-instrument") ||
F.hasFnAttribute("xray-instruction-threshold") ||
needFuncLabelsForEHOrDebugInfo(MF) || NeedsLocalForSize ||
- MF.getTarget().Options.EmitStackSizeSection) {
+ MF.getTarget().Options.EmitStackSizeSection || MF.hasBBLabels()) {
CurrentFnBegin = createTempSymbol("func_begin");
if (NeedsLocalForSize)
CurrentFnSymForSize = CurrentFnBegin;
@@ -1882,8 +1981,7 @@ void AsmPrinter::emitConstantPool() {
unsigned NewOffset = alignTo(Offset, CPE.getAlign());
OutStreamer->emitZeros(NewOffset - Offset);
- Type *Ty = CPE.getType();
- Offset = NewOffset + getDataLayout().getTypeAllocSize(Ty);
+ Offset = NewOffset + CPE.getSizeInBytes(getDataLayout());
OutStreamer->emitLabel(Sym);
if (CPE.isMachineConstantPoolEntry())
@@ -2083,47 +2181,50 @@ void AsmPrinter::emitLLVMUsedList(const ConstantArray *InitList) {
}
}
-namespace {
-
-struct Structor {
- int Priority = 0;
- Constant *Func = nullptr;
- GlobalValue *ComdatKey = nullptr;
-
- Structor() = default;
-};
-
-} // end anonymous namespace
-
-/// EmitXXStructorList - Emit the ctor or dtor list taking into account the init
-/// priority.
-void AsmPrinter::emitXXStructorList(const DataLayout &DL, const Constant *List,
- bool isCtor) {
- // Should be an array of '{ i32, void ()*, i8* }' structs. The first value is the
- // init priority.
- if (!isa<ConstantArray>(List)) return;
+void AsmPrinter::preprocessXXStructorList(const DataLayout &DL,
+ const Constant *List,
+ SmallVector<Structor, 8> &Structors) {
+ // Should be an array of '{ i32, void ()*, i8* }' structs. The first value is
+ // the init priority.
+ if (!isa<ConstantArray>(List))
+ return;
// Gather the structors in a form that's convenient for sorting by priority.
- SmallVector<Structor, 8> Structors;
for (Value *O : cast<ConstantArray>(List)->operands()) {
auto *CS = cast<ConstantStruct>(O);
if (CS->getOperand(1)->isNullValue())
- break; // Found a null terminator, skip the rest.
+ break; // Found a null terminator, skip the rest.
ConstantInt *Priority = dyn_cast<ConstantInt>(CS->getOperand(0));
- if (!Priority) continue; // Malformed.
+ if (!Priority)
+ continue; // Malformed.
Structors.push_back(Structor());
Structor &S = Structors.back();
S.Priority = Priority->getLimitedValue(65535);
S.Func = CS->getOperand(1);
- if (!CS->getOperand(2)->isNullValue())
+ if (!CS->getOperand(2)->isNullValue()) {
+ if (TM.getTargetTriple().isOSAIX())
+ llvm::report_fatal_error(
+ "associated data of XXStructor list is not yet supported on AIX");
S.ComdatKey =
dyn_cast<GlobalValue>(CS->getOperand(2)->stripPointerCasts());
+ }
}
// Emit the function pointers in the target-specific order
llvm::stable_sort(Structors, [](const Structor &L, const Structor &R) {
return L.Priority < R.Priority;
});
+}
+
+/// EmitXXStructorList - Emit the ctor or dtor list taking into account the init
+/// priority.
+void AsmPrinter::emitXXStructorList(const DataLayout &DL, const Constant *List,
+ bool IsCtor) {
+ SmallVector<Structor, 8> Structors;
+ preprocessXXStructorList(DL, List, Structors);
+ if (Structors.empty())
+ return;
+
const Align Align = DL.getPointerPrefAlignment();
for (Structor &S : Structors) {
const TargetLoweringObjectFile &Obj = getObjFileLowering();
@@ -2139,8 +2240,9 @@ void AsmPrinter::emitXXStructorList(const DataLayout &DL, const Constant *List,
KeySym = getSymbol(GV);
}
+
MCSection *OutputSection =
- (isCtor ? Obj.getStaticCtorSection(S.Priority, KeySym)
+ (IsCtor ? Obj.getStaticCtorSection(S.Priority, KeySym)
: Obj.getStaticDtorSection(S.Priority, KeySym));
OutStreamer->SwitchSection(OutputSection);
if (OutStreamer->getCurrentSection() != OutStreamer->getPreviousSection())
@@ -2274,12 +2376,25 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV))
return MCSymbolRefExpr::create(GetBlockAddressSymbol(BA), Ctx);
+ if (const auto *Equiv = dyn_cast<DSOLocalEquivalent>(CV))
+ return getObjFileLowering().lowerDSOLocalEquivalent(Equiv, TM);
+
const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
if (!CE) {
llvm_unreachable("Unknown constant value to lower!");
}
switch (CE->getOpcode()) {
+ case Instruction::AddrSpaceCast: {
+ const Constant *Op = CE->getOperand(0);
+ unsigned DstAS = CE->getType()->getPointerAddressSpace();
+ unsigned SrcAS = Op->getType()->getPointerAddressSpace();
+ if (TM.isNoopAddrSpaceCast(SrcAS, DstAS))
+ return lowerConstant(Op);
+
+ // Fallthrough to error.
+ LLVM_FALLTHROUGH;
+ }
default: {
// If the code isn't optimized, there may be outstanding folding
// opportunities. Attempt to fold the expression using DataLayout as a
@@ -2345,7 +2460,8 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
//
// If the pointer is larger than the resultant integer, then
// as with Trunc just depend on the assembler to truncate it.
- if (DL.getTypeAllocSize(Ty) <= DL.getTypeAllocSize(Op->getType()))
+ if (DL.getTypeAllocSize(Ty).getFixedSize() <=
+ DL.getTypeAllocSize(Op->getType()).getFixedSize())
return OpExpr;
// Otherwise the pointer is smaller than the resultant integer, mask off
@@ -2359,18 +2475,25 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
case Instruction::Sub: {
GlobalValue *LHSGV;
APInt LHSOffset;
+ DSOLocalEquivalent *DSOEquiv;
if (IsConstantOffsetFromGlobal(CE->getOperand(0), LHSGV, LHSOffset,
- getDataLayout())) {
+ getDataLayout(), &DSOEquiv)) {
GlobalValue *RHSGV;
APInt RHSOffset;
if (IsConstantOffsetFromGlobal(CE->getOperand(1), RHSGV, RHSOffset,
getDataLayout())) {
const MCExpr *RelocExpr =
getObjFileLowering().lowerRelativeReference(LHSGV, RHSGV, TM);
- if (!RelocExpr)
+ if (!RelocExpr) {
+ const MCExpr *LHSExpr =
+ MCSymbolRefExpr::create(getSymbol(LHSGV), Ctx);
+ if (DSOEquiv &&
+ getObjFileLowering().supportDSOLocalEquivalentLowering())
+ LHSExpr =
+ getObjFileLowering().lowerDSOLocalEquivalent(DSOEquiv, TM);
RelocExpr = MCBinaryExpr::createSub(
- MCSymbolRefExpr::create(getSymbol(LHSGV), Ctx),
- MCSymbolRefExpr::create(getSymbol(RHSGV), Ctx), Ctx);
+ LHSExpr, MCSymbolRefExpr::create(getSymbol(RHSGV), Ctx), Ctx);
+ }
int64_t Addend = (LHSOffset - RHSOffset).getSExtValue();
if (Addend != 0)
RelocExpr = MCBinaryExpr::createAdd(
@@ -3001,7 +3124,7 @@ static void emitBasicBlockLoopComments(const MachineBasicBlock &MBB,
OS.indent(Loop->getLoopDepth()*2-2);
OS << "This ";
- if (Loop->empty())
+ if (Loop->isInnermost())
OS << "Inner ";
OS << "Loop Header: Depth=" + Twine(Loop->getLoopDepth()) << '\n';
@@ -3025,6 +3148,16 @@ void AsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) {
if (Alignment != Align(1))
emitAlignment(Alignment);
+ // Switch to a new section if this basic block must begin a section. The
+ // entry block is always placed in the function section and is handled
+ // separately.
+ if (MBB.isBeginSection() && !MBB.isEntryBlock()) {
+ OutStreamer->SwitchSection(
+ getObjFileLowering().getSectionForMachineBasicBlock(MF->getFunction(),
+ MBB, TM));
+ CurrentSectionBeginSym = MBB.getSymbol();
+ }
+
// If the block has its address taken, emit any labels that were used to
// reference the block. It is possible that there is more than one label
// here, because multiple LLVM BB's may have been RAUW'd to this block after
@@ -3055,33 +3188,25 @@ void AsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) {
emitBasicBlockLoopComments(MBB, MLI, *this);
}
- if (MBB.pred_empty() ||
- (!MF->hasBBLabels() && isBlockOnlyReachableByFallthrough(&MBB) &&
- !MBB.isEHFuncletEntry() && !MBB.hasLabelMustBeEmitted())) {
+ // Print the main label for the block.
+ if (shouldEmitLabelForBasicBlock(MBB)) {
+ if (isVerbose() && MBB.hasLabelMustBeEmitted())
+ OutStreamer->AddComment("Label of block must be emitted");
+ OutStreamer->emitLabel(MBB.getSymbol());
+ } else {
if (isVerbose()) {
// NOTE: Want this comment at start of line, don't emit with AddComment.
OutStreamer->emitRawComment(" %bb." + Twine(MBB.getNumber()) + ":",
false);
}
- } else {
- if (isVerbose() && MBB.hasLabelMustBeEmitted()) {
- OutStreamer->AddComment("Label of block must be emitted");
- }
- auto *BBSymbol = MBB.getSymbol();
- // Switch to a new section if this basic block must begin a section.
- if (MBB.isBeginSection()) {
- OutStreamer->SwitchSection(
- getObjFileLowering().getSectionForMachineBasicBlock(MF->getFunction(),
- MBB, TM));
- CurrentSectionBeginSym = BBSymbol;
- }
- OutStreamer->emitLabel(BBSymbol);
- // With BB sections, each basic block must handle CFI information on its own
- // if it begins a section.
- if (MBB.isBeginSection())
- for (const HandlerInfo &HI : Handlers)
- HI.Handler->beginBasicBlock(MBB);
}
+
+ // With BB sections, each basic block must handle CFI information on its own
+ // if it begins a section (Entry block is handled separately by
+ // AsmPrinterHandler::beginFunction).
+ if (MBB.isBeginSection() && !MBB.isEntryBlock())
+ for (const HandlerInfo &HI : Handlers)
+ HI.Handler->beginBasicBlock(MBB);
}
void AsmPrinter::emitBasicBlockEnd(const MachineBasicBlock &MBB) {
@@ -3113,15 +3238,26 @@ void AsmPrinter::emitVisibility(MCSymbol *Sym, unsigned Visibility,
OutStreamer->emitSymbolAttribute(Sym, Attr);
}
+bool AsmPrinter::shouldEmitLabelForBasicBlock(
+ const MachineBasicBlock &MBB) const {
+ // With `-fbasic-block-sections=`, a label is needed for every non-entry block
+ // in the labels mode (option `=labels`) and every section beginning in the
+ // sections mode (`=all` and `=list=`).
+ if ((MF->hasBBLabels() || MBB.isBeginSection()) && !MBB.isEntryBlock())
+ return true;
+ // A label is needed for any block with at least one predecessor (when that
+ // predecessor is not the fallthrough predecessor, or if it is an EH funclet
+ // entry, or if a label is forced).
+ return !MBB.pred_empty() &&
+ (!isBlockOnlyReachableByFallthrough(&MBB) || MBB.isEHFuncletEntry() ||
+ MBB.hasLabelMustBeEmitted());
+}
+
/// isBlockOnlyReachableByFallthough - Return true if the basic block has
/// exactly one predecessor and the control transfer mechanism between
/// the predecessor and this block is a fall-through.
bool AsmPrinter::
isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
- // With BasicBlock Sections, beginning of the section is not a fallthrough.
- if (MBB->isBeginSection())
- return false;
-
// If this is a landing pad, it isn't a fall through. If it has no preds,
// then nothing falls through to it.
if (MBB->isEHPad() || MBB->pred_empty())
@@ -3232,14 +3368,10 @@ void AsmPrinter::emitXRayTable() {
MCSection *InstMap = nullptr;
MCSection *FnSledIndex = nullptr;
const Triple &TT = TM.getTargetTriple();
- // Use PC-relative addresses on all targets except MIPS (MIPS64 cannot use
- // PC-relative addresses because R_MIPS_PC64 does not exist).
- bool PCRel = !TT.isMIPS();
+ // Use PC-relative addresses on all targets.
if (TT.isOSBinFormatELF()) {
auto LinkedToSym = cast<MCSymbolELF>(CurrentFnSym);
auto Flags = ELF::SHF_ALLOC | ELF::SHF_LINK_ORDER;
- if (!PCRel)
- Flags |= ELF::SHF_WRITE;
StringRef GroupName;
if (F.hasComdat()) {
Flags |= ELF::SHF_GROUP;
@@ -3273,25 +3405,20 @@ void AsmPrinter::emitXRayTable() {
OutStreamer->SwitchSection(InstMap);
OutStreamer->emitLabel(SledsStart);
for (const auto &Sled : Sleds) {
- if (PCRel) {
- MCSymbol *Dot = Ctx.createTempSymbol();
- OutStreamer->emitLabel(Dot);
- OutStreamer->emitValueImpl(
- MCBinaryExpr::createSub(MCSymbolRefExpr::create(Sled.Sled, Ctx),
- MCSymbolRefExpr::create(Dot, Ctx), Ctx),
- WordSizeBytes);
- OutStreamer->emitValueImpl(
- MCBinaryExpr::createSub(
- MCSymbolRefExpr::create(CurrentFnBegin, Ctx),
- MCBinaryExpr::createAdd(
- MCSymbolRefExpr::create(Dot, Ctx),
- MCConstantExpr::create(WordSizeBytes, Ctx), Ctx),
- Ctx),
- WordSizeBytes);
- } else {
- OutStreamer->emitSymbolValue(Sled.Sled, WordSizeBytes);
- OutStreamer->emitSymbolValue(CurrentFnSym, WordSizeBytes);
- }
+ MCSymbol *Dot = Ctx.createTempSymbol();
+ OutStreamer->emitLabel(Dot);
+ OutStreamer->emitValueImpl(
+ MCBinaryExpr::createSub(MCSymbolRefExpr::create(Sled.Sled, Ctx),
+ MCSymbolRefExpr::create(Dot, Ctx), Ctx),
+ WordSizeBytes);
+ OutStreamer->emitValueImpl(
+ MCBinaryExpr::createSub(
+ MCSymbolRefExpr::create(CurrentFnBegin, Ctx),
+ MCBinaryExpr::createAdd(MCSymbolRefExpr::create(Dot, Ctx),
+ MCConstantExpr::create(WordSizeBytes, Ctx),
+ Ctx),
+ Ctx),
+ WordSizeBytes);
Sled.emit(WordSizeBytes, OutStreamer.get());
}
MCSymbol *SledsEnd = OutContext.createTempSymbol("xray_sleds_end", true);
@@ -3366,3 +3493,17 @@ uint16_t AsmPrinter::getDwarfVersion() const {
void AsmPrinter::setDwarfVersion(uint16_t Version) {
OutStreamer->getContext().setDwarfVersion(Version);
}
+
+bool AsmPrinter::isDwarf64() const {
+ return OutStreamer->getContext().getDwarfFormat() == dwarf::DWARF64;
+}
+
+unsigned int AsmPrinter::getDwarfOffsetByteSize() const {
+ return dwarf::getDwarfOffsetByteSize(
+ OutStreamer->getContext().getDwarfFormat());
+}
+
+unsigned int AsmPrinter::getUnitLengthFieldByteSize() const {
+ return dwarf::getUnitLengthFieldByteSize(
+ OutStreamer->getContext().getDwarfFormat());
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index b6a9a9568360..c6e43445e7d0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -27,6 +27,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
+#include <cstdint>
using namespace llvm;
#define DEBUG_TYPE "asm-printer"
@@ -97,6 +98,12 @@ static const char *DecodeDWARFEncoding(unsigned Encoding) {
case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8
:
return "indirect pcrel sdata8";
+ case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_datarel |
+ dwarf::DW_EH_PE_sdata4:
+ return "indirect datarel sdata4";
+ case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_datarel |
+ dwarf::DW_EH_PE_sdata8:
+ return "indirect datarel sdata8";
}
return "<unknown encoding>";
@@ -137,8 +144,7 @@ unsigned AsmPrinter::GetSizeOfEncodedValue(unsigned Encoding) const {
}
}
-void AsmPrinter::emitTTypeReference(const GlobalValue *GV,
- unsigned Encoding) const {
+void AsmPrinter::emitTTypeReference(const GlobalValue *GV, unsigned Encoding) {
if (GV) {
const TargetLoweringObjectFile &TLOF = getObjFileLowering();
@@ -154,19 +160,22 @@ void AsmPrinter::emitDwarfSymbolReference(const MCSymbol *Label,
if (!ForceOffset) {
// On COFF targets, we have to emit the special .secrel32 directive.
if (MAI->needsDwarfSectionOffsetDirective()) {
+ assert(!isDwarf64() &&
+ "emitting DWARF64 is not implemented for COFF targets");
OutStreamer->EmitCOFFSecRel32(Label, /*Offset=*/0);
return;
}
// If the format uses relocations with dwarf, refer to the symbol directly.
if (MAI->doesDwarfUseRelocationsAcrossSections()) {
- OutStreamer->emitSymbolValue(Label, 4);
+ OutStreamer->emitSymbolValue(Label, getDwarfOffsetByteSize());
return;
}
}
// Otherwise, emit it as a label difference from the start of the section.
- emitLabelDifference(Label, Label->getSection().getBeginSymbol(), 4);
+ emitLabelDifference(Label, Label->getSection().getBeginSymbol(),
+ getDwarfOffsetByteSize());
}
void AsmPrinter::emitDwarfStringOffset(DwarfStringPoolEntry S) const {
@@ -177,12 +186,38 @@ void AsmPrinter::emitDwarfStringOffset(DwarfStringPoolEntry S) const {
}
// Just emit the offset directly; no need for symbol math.
- emitInt32(S.Offset);
+ OutStreamer->emitIntValue(S.Offset, getDwarfOffsetByteSize());
}
void AsmPrinter::emitDwarfOffset(const MCSymbol *Label, uint64_t Offset) const {
- // TODO: Support DWARF64
- emitLabelPlusOffset(Label, Offset, 4);
+ emitLabelPlusOffset(Label, Offset, getDwarfOffsetByteSize());
+}
+
+void AsmPrinter::emitDwarfLengthOrOffset(uint64_t Value) const {
+ assert(isDwarf64() || Value <= UINT32_MAX);
+ OutStreamer->emitIntValue(Value, getDwarfOffsetByteSize());
+}
+
+void AsmPrinter::maybeEmitDwarf64Mark() const {
+ if (!isDwarf64())
+ return;
+ OutStreamer->AddComment("DWARF64 Mark");
+ OutStreamer->emitInt32(dwarf::DW_LENGTH_DWARF64);
+}
+
+void AsmPrinter::emitDwarfUnitLength(uint64_t Length,
+ const Twine &Comment) const {
+ assert(isDwarf64() || Length <= dwarf::DW_LENGTH_lo_reserved);
+ maybeEmitDwarf64Mark();
+ OutStreamer->AddComment(Comment);
+ OutStreamer->emitIntValue(Length, getDwarfOffsetByteSize());
+}
+
+void AsmPrinter::emitDwarfUnitLength(const MCSymbol *Hi, const MCSymbol *Lo,
+ const Twine &Comment) const {
+ maybeEmitDwarf64Mark();
+ OutStreamer->AddComment(Comment);
+ OutStreamer->emitAbsoluteSymbolDiff(Hi, Lo, getDwarfOffsetByteSize());
}
void AsmPrinter::emitCallSiteOffset(const MCSymbol *Hi, const MCSymbol *Lo,
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index 538107cecd8b..4a67b0bc2c4d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -146,6 +147,7 @@ void AsmPrinter::emitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
// we only need MCInstrInfo for asm parsing. We create one unconditionally
// because it's not subtarget dependent.
std::unique_ptr<MCInstrInfo> MII(TM.getTarget().createMCInstrInfo());
+ assert(MII && "Failed to create instruction info");
std::unique_ptr<MCTargetAsmParser> TAP(TM.getTarget().createMCAsmParser(
STI, *Parser, *MII, MCOptions));
if (!TAP)
@@ -232,7 +234,8 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
const char *IDStart = LastEmitted;
const char *IDEnd = IDStart;
- while (*IDEnd >= '0' && *IDEnd <= '9') ++IDEnd;
+ while (isDigit(*IDEnd))
+ ++IDEnd;
unsigned Val;
if (StringRef(IDStart, IDEnd-IDStart).getAsInteger(10, Val))
@@ -397,7 +400,8 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
const char *IDStart = LastEmitted;
const char *IDEnd = IDStart;
- while (*IDEnd >= '0' && *IDEnd <= '9') ++IDEnd;
+ while (isDigit(*IDEnd))
+ ++IDEnd;
unsigned Val;
if (StringRef(IDStart, IDEnd-IDStart).getAsInteger(10, Val))
@@ -547,22 +551,23 @@ void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const {
EmitMSInlineAsmStr(AsmStr, MI, MMI, AP, LocCookie, OS);
// Emit warnings if we use reserved registers on the clobber list, as
- // that might give surprising results.
- std::vector<std::string> RestrRegs;
+ // that might lead to undefined behaviour.
+ SmallVector<Register, 8> RestrRegs;
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
// Start with the first operand descriptor, and iterate over them.
for (unsigned I = InlineAsm::MIOp_FirstOperand, NumOps = MI->getNumOperands();
I < NumOps; ++I) {
const MachineOperand &MO = MI->getOperand(I);
- if (MO.isImm()) {
- unsigned Flags = MO.getImm();
- const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
- if (InlineAsm::getKind(Flags) == InlineAsm::Kind_Clobber &&
- !TRI->isAsmClobberable(*MF, MI->getOperand(I + 1).getReg())) {
- RestrRegs.push_back(TRI->getName(MI->getOperand(I + 1).getReg()));
- }
- // Skip to one before the next operand descriptor, if it exists.
- I += InlineAsm::getNumOperandRegisters(Flags);
+ if (!MO.isImm())
+ continue;
+ unsigned Flags = MO.getImm();
+ if (InlineAsm::getKind(Flags) == InlineAsm::Kind_Clobber) {
+ Register Reg = MI->getOperand(I + 1).getReg();
+ if (!TRI->isAsmClobberable(*MF, Reg))
+ RestrRegs.push_back(Reg);
}
+ // Skip to one before the next operand descriptor, if it exists.
+ I += InlineAsm::getNumOperandRegisters(Flags);
}
if (!RestrRegs.empty()) {
@@ -572,14 +577,15 @@ void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const {
SrcMgr.getMemoryBuffer(BufNum)->getBuffer().begin());
std::string Msg = "inline asm clobber list contains reserved registers: ";
- for (auto I = RestrRegs.begin(), E = RestrRegs.end(); I != E; I++) {
+ for (auto I = RestrRegs.begin(), E = RestrRegs.end(); I != E; ++I) {
if(I != RestrRegs.begin())
Msg += ", ";
- Msg += *I;
+ Msg += TRI->getName(*I);
}
- std::string Note = "Reserved registers on the clobber list may not be "
- "preserved across the asm statement, and clobbering them may "
- "lead to undefined behaviour.";
+ const char *Note =
+ "Reserved registers on the clobber list may not be "
+ "preserved across the asm statement, and clobbering them may "
+ "lead to undefined behaviour.";
SrcMgr.PrintMessage(Loc, SourceMgr::DK_Warning, Msg);
SrcMgr.PrintMessage(Loc, SourceMgr::DK_Note, Note);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h
index 90929a217368..5e7db1f2f76c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h
@@ -29,7 +29,7 @@ class ByteStreamer {
public:
// For now we're just handling the calls we need for dwarf emission/hashing.
- virtual void EmitInt8(uint8_t Byte, const Twine &Comment = "") = 0;
+ virtual void emitInt8(uint8_t Byte, const Twine &Comment = "") = 0;
virtual void emitSLEB128(uint64_t DWord, const Twine &Comment = "") = 0;
virtual void emitULEB128(uint64_t DWord, const Twine &Comment = "",
unsigned PadTo = 0) = 0;
@@ -41,7 +41,7 @@ private:
public:
APByteStreamer(AsmPrinter &Asm) : AP(Asm) {}
- void EmitInt8(uint8_t Byte, const Twine &Comment) override {
+ void emitInt8(uint8_t Byte, const Twine &Comment) override {
AP.OutStreamer->AddComment(Comment);
AP.emitInt8(Byte);
}
@@ -61,7 +61,7 @@ class HashingByteStreamer final : public ByteStreamer {
DIEHash &Hash;
public:
HashingByteStreamer(DIEHash &H) : Hash(H) {}
- void EmitInt8(uint8_t Byte, const Twine &Comment) override {
+ void emitInt8(uint8_t Byte, const Twine &Comment) override {
Hash.update(Byte);
}
void emitSLEB128(uint64_t DWord, const Twine &Comment) override {
@@ -88,7 +88,7 @@ public:
std::vector<std::string> &Comments, bool GenerateComments)
: Buffer(Buffer), Comments(Comments), GenerateComments(GenerateComments) {
}
- void EmitInt8(uint8_t Byte, const Twine &Comment) override {
+ void emitInt8(uint8_t Byte, const Twine &Comment) override {
Buffer.push_back(Byte);
if (GenerateComments)
Comments.push_back(Comment.str());
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index 39069e24e061..b15e750aaf85 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -13,15 +13,10 @@
#include "CodeViewDebug.h"
#include "DwarfExpression.h"
#include "llvm/ADT/APSInt.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/TinyPtrVector.h"
#include "llvm/ADT/Triple.h"
@@ -40,7 +35,6 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
-#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/CodeViewRecordIO.h"
#include "llvm/DebugInfo/CodeView/ContinuationRecordBuilder.h"
#include "llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h"
@@ -48,14 +42,12 @@
#include "llvm/DebugInfo/CodeView/Line.h"
#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
#include "llvm/DebugInfo/CodeView/TypeDumpVisitor.h"
-#include "llvm/DebugInfo/CodeView/TypeIndex.h"
#include "llvm/DebugInfo/CodeView/TypeRecord.h"
#include "llvm/DebugInfo/CodeView/TypeTableCollection.h"
#include "llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfoMetadata.h"
-#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
@@ -71,7 +63,6 @@
#include "llvm/Support/BinaryStreamWriter.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
@@ -85,12 +76,8 @@
#include <cassert>
#include <cctype>
#include <cstddef>
-#include <cstdint>
#include <iterator>
#include <limits>
-#include <string>
-#include <utility>
-#include <vector>
using namespace llvm;
using namespace llvm::codeview;
@@ -139,7 +126,9 @@ static CPUType mapArchToCVCPUType(Triple::ArchType Type) {
case Triple::ArchType::x86_64:
return CPUType::X64;
case Triple::ArchType::thumb:
- return CPUType::Thumb;
+ // LLVM currently doesn't support Windows CE and so thumb
+ // here is indiscriminately mapped to ARMNT specifically.
+ return CPUType::ARMNT;
case Triple::ArchType::aarch64:
return CPUType::ARM64;
default:
@@ -148,28 +137,7 @@ static CPUType mapArchToCVCPUType(Triple::ArchType Type) {
}
CodeViewDebug::CodeViewDebug(AsmPrinter *AP)
- : DebugHandlerBase(AP), OS(*Asm->OutStreamer), TypeTable(Allocator) {
- // If module doesn't have named metadata anchors or COFF debug section
- // is not available, skip any debug info related stuff.
- if (!MMI->getModule()->getNamedMetadata("llvm.dbg.cu") ||
- !AP->getObjFileLowering().getCOFFDebugSymbolsSection()) {
- Asm = nullptr;
- MMI->setDebugInfoAvailability(false);
- return;
- }
- // Tell MMI that we have debug info.
- MMI->setDebugInfoAvailability(true);
-
- TheCPU =
- mapArchToCVCPUType(Triple(MMI->getModule()->getTargetTriple()).getArch());
-
- collectGlobalVariableInfo();
-
- // Check if we should emit type record hashes.
- ConstantInt *GH = mdconst::extract_or_null<ConstantInt>(
- MMI->getModule()->getModuleFlag("CodeViewGHash"));
- EmitDebugGlobalHashes = GH && !GH->isZero();
-}
+ : DebugHandlerBase(AP), OS(*Asm->OutStreamer), TypeTable(Allocator) {}
StringRef CodeViewDebug::getFullFilepath(const DIFile *File) {
std::string &Filepath = FileToFilepathMap[File];
@@ -507,8 +475,7 @@ void CodeViewDebug::recordLocalVariable(LocalVariable &&Var,
static void addLocIfNotPresent(SmallVectorImpl<const DILocation *> &Locs,
const DILocation *Loc) {
- auto B = Locs.begin(), E = Locs.end();
- if (std::find(B, E, Loc) == E)
+ if (!llvm::is_contained(Locs, Loc))
Locs.push_back(Loc);
}
@@ -574,12 +541,31 @@ void CodeViewDebug::emitCodeViewMagicVersion() {
OS.emitInt32(COFF::DEBUG_SECTION_MAGIC);
}
+void CodeViewDebug::beginModule(Module *M) {
+ // If module doesn't have named metadata anchors or COFF debug section
+ // is not available, skip any debug info related stuff.
+ if (!M->getNamedMetadata("llvm.dbg.cu") ||
+ !Asm->getObjFileLowering().getCOFFDebugSymbolsSection()) {
+ Asm = nullptr;
+ return;
+ }
+ // Tell MMI that we have and need debug info.
+ MMI->setDebugInfoAvailability(true);
+
+ TheCPU = mapArchToCVCPUType(Triple(M->getTargetTriple()).getArch());
+
+ collectGlobalVariableInfo();
+
+ // Check if we should emit type record hashes.
+ ConstantInt *GH =
+ mdconst::extract_or_null<ConstantInt>(M->getModuleFlag("CodeViewGHash"));
+ EmitDebugGlobalHashes = GH && !GH->isZero();
+}
+
void CodeViewDebug::endModule() {
if (!Asm || !MMI->hasDebugInfo())
return;
- assert(Asm != nullptr);
-
// The COFF .debug$S section consists of several subsections, each starting
// with a 4-byte control code (e.g. 0xF1, 0xF2, etc) and then a 4-byte length
// of the payload followed by the payload itself. The subsections are 4-byte
@@ -600,13 +586,18 @@ void CodeViewDebug::endModule() {
if (!P.first->isDeclarationForLinker())
emitDebugInfoForFunction(P.first, *P.second);
- // Emit global variable debug information.
- setCurrentSubprogram(nullptr);
- emitDebugInfoForGlobals();
+ // Get types used by globals without emitting anything.
+ // This is meant to collect all static const data members so they can be
+ // emitted as globals.
+ collectDebugInfoForGlobals();
// Emit retained types.
emitDebugInfoForRetainedTypes();
+ // Emit global variable debug information.
+ setCurrentSubprogram(nullptr);
+ emitDebugInfoForGlobals();
+
// Switch back to the generic .debug$S section after potentially processing
// comdat symbol sections.
switchToDebugSectionForSymbol(nullptr);
@@ -1195,12 +1186,15 @@ void CodeViewDebug::collectVariableInfoFromMFTable(
// Get the frame register used and the offset.
Register FrameReg;
- int FrameOffset = TFI->getFrameIndexReference(*Asm->MF, VI.Slot, FrameReg);
+ StackOffset FrameOffset = TFI->getFrameIndexReference(*Asm->MF, VI.Slot, FrameReg);
uint16_t CVReg = TRI->getCodeViewRegNum(FrameReg);
+ assert(!FrameOffset.getScalable() &&
+ "Frame offsets with a scalable component are not supported");
+
// Calculate the label ranges.
LocalVarDefRange DefRange =
- createDefRangeMem(CVReg, FrameOffset + ExprOffset);
+ createDefRangeMem(CVReg, FrameOffset.getFixed() + ExprOffset);
for (const InsnRange &Range : Scope->getRanges()) {
const MCSymbol *Begin = getLabelBeforeInsn(Range.first);
@@ -2155,6 +2149,15 @@ void CodeViewDebug::collectMemberInfo(ClassInfo &Info,
const DIDerivedType *DDTy) {
if (!DDTy->getName().empty()) {
Info.Members.push_back({DDTy, 0});
+
+ // Collect static const data members with values.
+ if ((DDTy->getFlags() & DINode::FlagStaticMember) ==
+ DINode::FlagStaticMember) {
+ if (DDTy->getConstant() && (isa<ConstantInt>(DDTy->getConstant()) ||
+ isa<ConstantFP>(DDTy->getConstant())))
+ StaticConstMembers.push_back(DDTy);
+ }
+
return;
}
@@ -3057,15 +3060,32 @@ void CodeViewDebug::collectGlobalVariableInfo() {
}
}
+void CodeViewDebug::collectDebugInfoForGlobals() {
+ for (const CVGlobalVariable &CVGV : GlobalVariables) {
+ const DIGlobalVariable *DIGV = CVGV.DIGV;
+ const DIScope *Scope = DIGV->getScope();
+ getCompleteTypeIndex(DIGV->getType());
+ getFullyQualifiedName(Scope, DIGV->getName());
+ }
+
+ for (const CVGlobalVariable &CVGV : ComdatVariables) {
+ const DIGlobalVariable *DIGV = CVGV.DIGV;
+ const DIScope *Scope = DIGV->getScope();
+ getCompleteTypeIndex(DIGV->getType());
+ getFullyQualifiedName(Scope, DIGV->getName());
+ }
+}
+
void CodeViewDebug::emitDebugInfoForGlobals() {
// First, emit all globals that are not in a comdat in a single symbol
// substream. MSVC doesn't like it if the substream is empty, so only open
// it if we have at least one global to emit.
switchToDebugSectionForSymbol(nullptr);
- if (!GlobalVariables.empty()) {
+ if (!GlobalVariables.empty() || !StaticConstMembers.empty()) {
OS.AddComment("Symbol subsection for globals");
MCSymbol *EndLabel = beginCVSubsection(DebugSubsectionKind::Symbols);
emitGlobalVariableList(GlobalVariables);
+ emitStaticConstMemberList();
endCVSubsection(EndLabel);
}
@@ -3104,6 +3124,61 @@ void CodeViewDebug::emitGlobalVariableList(ArrayRef<CVGlobalVariable> Globals) {
}
}
+void CodeViewDebug::emitStaticConstMemberList() {
+ for (const DIDerivedType *DTy : StaticConstMembers) {
+ const DIScope *Scope = DTy->getScope();
+
+ APSInt Value;
+ if (const ConstantInt *CI =
+ dyn_cast_or_null<ConstantInt>(DTy->getConstant()))
+ Value = APSInt(CI->getValue(),
+ DebugHandlerBase::isUnsignedDIType(DTy->getBaseType()));
+ else if (const ConstantFP *CFP =
+ dyn_cast_or_null<ConstantFP>(DTy->getConstant()))
+ Value = APSInt(CFP->getValueAPF().bitcastToAPInt(), true);
+ else
+ llvm_unreachable("cannot emit a constant without a value");
+
+ std::string QualifiedName = getFullyQualifiedName(Scope, DTy->getName());
+
+ MCSymbol *SConstantEnd = beginSymbolRecord(SymbolKind::S_CONSTANT);
+ OS.AddComment("Type");
+ OS.emitInt32(getTypeIndex(DTy->getBaseType()).getIndex());
+ OS.AddComment("Value");
+
+ // Encoded integers shouldn't need more than 10 bytes.
+ uint8_t Data[10];
+ BinaryStreamWriter Writer(Data, llvm::support::endianness::little);
+ CodeViewRecordIO IO(Writer);
+ cantFail(IO.mapEncodedInteger(Value));
+ StringRef SRef((char *)Data, Writer.getOffset());
+ OS.emitBinaryData(SRef);
+
+ OS.AddComment("Name");
+ emitNullTerminatedSymbolName(OS, QualifiedName);
+ endSymbolRecord(SConstantEnd);
+ }
+}
+
+static bool isFloatDIType(const DIType *Ty) {
+ if (isa<DICompositeType>(Ty))
+ return false;
+
+ if (auto *DTy = dyn_cast<DIDerivedType>(Ty)) {
+ dwarf::Tag T = (dwarf::Tag)Ty->getTag();
+ if (T == dwarf::DW_TAG_pointer_type ||
+ T == dwarf::DW_TAG_ptr_to_member_type ||
+ T == dwarf::DW_TAG_reference_type ||
+ T == dwarf::DW_TAG_rvalue_reference_type)
+ return false;
+ assert(DTy->getBaseType() && "Expected valid base type");
+ return isFloatDIType(DTy->getBaseType());
+ }
+
+ auto *BTy = cast<DIBasicType>(Ty);
+ return (BTy->getEncoding() == dwarf::DW_ATE_float);
+}
+
void CodeViewDebug::emitDebugInfoForGlobal(const CVGlobalVariable &CVGV) {
const DIGlobalVariable *DIGV = CVGV.DIGV;
@@ -3139,7 +3214,12 @@ void CodeViewDebug::emitDebugInfoForGlobal(const CVGlobalVariable &CVGV) {
const DIExpression *DIE = CVGV.GVInfo.get<const DIExpression *>();
assert(DIE->isConstant() &&
"Global constant variables must contain a constant expression.");
- uint64_t Val = DIE->getElement(1);
+
+ // Use unsigned for floats.
+ bool isUnsigned = isFloatDIType(DIGV->getType())
+ ? true
+ : DebugHandlerBase::isUnsignedDIType(DIGV->getType());
+ APSInt Value(APInt(/*BitWidth=*/64, DIE->getElement(1)), isUnsigned);
MCSymbol *SConstantEnd = beginSymbolRecord(SymbolKind::S_CONSTANT);
OS.AddComment("Type");
@@ -3150,7 +3230,7 @@ void CodeViewDebug::emitDebugInfoForGlobal(const CVGlobalVariable &CVGV) {
uint8_t data[10];
BinaryStreamWriter Writer(data, llvm::support::endianness::little);
CodeViewRecordIO IO(Writer);
- cantFail(IO.mapEncodedInteger(Val));
+ cantFail(IO.mapEncodedInteger(Value));
StringRef SRef((char *)data, Writer.getOffset());
OS.emitBinaryData(SRef);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
index 82f0293874d0..9eee5492bc81 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
@@ -203,6 +203,9 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
// Array of non-COMDAT global variables.
SmallVector<CVGlobalVariable, 1> GlobalVariables;
+ /// List of static const data members to be emitted as S_CONSTANTs.
+ SmallVector<const DIDerivedType *, 4> StaticConstMembers;
+
/// The set of comdat .debug$S sections that we've seen so far. Each section
/// must start with a magic version number that must only be emitted once.
/// This set tracks which sections we've already opened.
@@ -227,10 +230,6 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
void calculateRanges(LocalVariable &Var,
const DbgValueHistoryMap::Entries &Entries);
- static void collectInlineSiteChildren(SmallVectorImpl<unsigned> &Children,
- const FunctionInfo &FI,
- const InlineSite &Site);
-
/// Remember some debug info about each function. Keep it in a stable order to
/// emit at the end of the TU.
MapVector<const Function *, std::unique_ptr<FunctionInfo>> FnDebugInfo;
@@ -313,9 +312,11 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
void emitDebugInfoForUDTs(
const std::vector<std::pair<std::string, const DIType *>> &UDTs);
+ void collectDebugInfoForGlobals();
void emitDebugInfoForGlobals();
void emitGlobalVariableList(ArrayRef<CVGlobalVariable> Globals);
void emitDebugInfoForGlobal(const CVGlobalVariable &CVGV);
+ void emitStaticConstMemberList();
/// Opens a subsection of the given kind in a .debug$S codeview section.
/// Returns an end label for use with endCVSubsection when the subsection is
@@ -464,6 +465,8 @@ protected:
public:
CodeViewDebug(AsmPrinter *AP);
+ void beginModule(Module *M) override;
+
void setSymbolSize(const MCSymbol *, uint64_t) override {}
/// Emit the COFF section that holds the line table information.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
index edf82fbed650..39b0b027c765 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -194,7 +194,7 @@ DIEAbbrev DIE::generateAbbrev() const {
return Abbrev;
}
-unsigned DIE::getDebugSectionOffset() const {
+uint64_t DIE::getDebugSectionOffset() const {
const DIEUnit *Unit = getUnit();
assert(Unit && "DIE must be owned by a DIEUnit to get its absolute offset");
return Unit->getDebugSectionOffset() + getOffset();
@@ -313,10 +313,8 @@ unsigned DIE::computeOffsetsAndAbbrevs(const AsmPrinter *AP,
//===----------------------------------------------------------------------===//
// DIEUnit Implementation
//===----------------------------------------------------------------------===//
-DIEUnit::DIEUnit(uint16_t V, uint8_t A, dwarf::Tag UnitTag)
- : Die(UnitTag), Section(nullptr), Offset(0), Length(0), Version(V),
- AddrSize(A)
-{
+DIEUnit::DIEUnit(dwarf::Tag UnitTag)
+ : Die(UnitTag), Section(nullptr), Offset(0) {
Die.Owner = this;
assert((UnitTag == dwarf::DW_TAG_compile_unit ||
UnitTag == dwarf::DW_TAG_skeleton_unit ||
@@ -430,10 +428,10 @@ void DIEInteger::emitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
/// SizeOf - Determine size of integer value in bytes.
///
unsigned DIEInteger::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
- dwarf::FormParams Params = {0, 0, dwarf::DWARF32};
- if (AP)
- Params = {AP->getDwarfVersion(), uint8_t(AP->getPointerSize()),
- AP->OutStreamer->getContext().getDwarfFormat()};
+ assert(AP && "AsmPrinter is required to set FormParams");
+ dwarf::FormParams Params = {AP->getDwarfVersion(),
+ uint8_t(AP->getPointerSize()),
+ AP->OutStreamer->getContext().getDwarfFormat()};
if (Optional<uint8_t> FixedSize = dwarf::getFixedFormByteSize(Form, Params))
return *FixedSize;
@@ -472,10 +470,16 @@ void DIEExpr::emitValue(const AsmPrinter *AP, dwarf::Form Form) const {
/// SizeOf - Determine size of expression value in bytes.
///
unsigned DIEExpr::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
- if (Form == dwarf::DW_FORM_data4) return 4;
- if (Form == dwarf::DW_FORM_sec_offset) return 4;
- if (Form == dwarf::DW_FORM_strp) return 4;
- return AP->getPointerSize();
+ switch (Form) {
+ case dwarf::DW_FORM_data4:
+ return 4;
+ case dwarf::DW_FORM_data8:
+ return 8;
+ case dwarf::DW_FORM_sec_offset:
+ return AP->getDwarfOffsetByteSize();
+ default:
+ llvm_unreachable("DIE Value form not supported yet");
+ }
}
LLVM_DUMP_METHOD
@@ -488,19 +492,26 @@ void DIEExpr::print(raw_ostream &O) const { O << "Expr: " << *Expr; }
/// EmitValue - Emit label value.
///
void DIELabel::emitValue(const AsmPrinter *AP, dwarf::Form Form) const {
- AP->emitLabelReference(
- Label, SizeOf(AP, Form),
- Form == dwarf::DW_FORM_strp || Form == dwarf::DW_FORM_sec_offset ||
- Form == dwarf::DW_FORM_ref_addr || Form == dwarf::DW_FORM_data4);
+ bool IsSectionRelative = Form != dwarf::DW_FORM_addr;
+ AP->emitLabelReference(Label, SizeOf(AP, Form), IsSectionRelative);
}
/// SizeOf - Determine size of label value in bytes.
///
unsigned DIELabel::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
- if (Form == dwarf::DW_FORM_data4) return 4;
- if (Form == dwarf::DW_FORM_sec_offset) return 4;
- if (Form == dwarf::DW_FORM_strp) return 4;
- return AP->MAI->getCodePointerSize();
+ switch (Form) {
+ case dwarf::DW_FORM_data4:
+ return 4;
+ case dwarf::DW_FORM_data8:
+ return 8;
+ case dwarf::DW_FORM_sec_offset:
+ case dwarf::DW_FORM_strp:
+ return AP->getDwarfOffsetByteSize();
+ case dwarf::DW_FORM_addr:
+ return AP->MAI->getCodePointerSize();
+ default:
+ llvm_unreachable("DIE Value form not supported yet");
+ }
}
LLVM_DUMP_METHOD
@@ -536,10 +547,16 @@ void DIEDelta::emitValue(const AsmPrinter *AP, dwarf::Form Form) const {
/// SizeOf - Determine size of delta value in bytes.
///
unsigned DIEDelta::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
- if (Form == dwarf::DW_FORM_data4) return 4;
- if (Form == dwarf::DW_FORM_sec_offset) return 4;
- if (Form == dwarf::DW_FORM_strp) return 4;
- return AP->MAI->getCodePointerSize();
+ switch (Form) {
+ case dwarf::DW_FORM_data4:
+ return 4;
+ case dwarf::DW_FORM_data8:
+ return 8;
+ case dwarf::DW_FORM_sec_offset:
+ return AP->getDwarfOffsetByteSize();
+ default:
+ llvm_unreachable("DIE Value form not supported yet");
+ }
}
LLVM_DUMP_METHOD
@@ -645,7 +662,7 @@ void DIEEntry::emitValue(const AsmPrinter *AP, dwarf::Form Form) const {
case dwarf::DW_FORM_ref_addr: {
// Get the absolute offset for this DIE within the debug info/types section.
- unsigned Addr = Entry->getDebugSectionOffset();
+ uint64_t Addr = Entry->getDebugSectionOffset();
if (const MCSymbol *SectionSym =
Entry->getUnit()->getCrossSectionRelativeBaseAddress()) {
AP->emitLabelPlusOffset(SectionSym, Addr, SizeOf(AP, Form), true);
@@ -802,13 +819,24 @@ void DIEBlock::print(raw_ostream &O) const {
//===----------------------------------------------------------------------===//
unsigned DIELocList::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
- if (Form == dwarf::DW_FORM_loclistx)
+ switch (Form) {
+ case dwarf::DW_FORM_loclistx:
return getULEB128Size(Index);
- if (Form == dwarf::DW_FORM_data4)
- return 4;
- if (Form == dwarf::DW_FORM_sec_offset)
+ case dwarf::DW_FORM_data4:
+ assert(!AP->isDwarf64() &&
+ "DW_FORM_data4 is not suitable to emit a pointer to a location list "
+ "in the 64-bit DWARF format");
return 4;
- return AP->MAI->getCodePointerSize();
+ case dwarf::DW_FORM_data8:
+ assert(AP->isDwarf64() &&
+ "DW_FORM_data8 is not suitable to emit a pointer to a location list "
+ "in the 32-bit DWARF format");
+ return 8;
+ case dwarf::DW_FORM_sec_offset:
+ return AP->getDwarfOffsetByteSize();
+ default:
+ llvm_unreachable("DIE Value form not supported yet");
+ }
}
/// EmitValue - Emit label value.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
index f26ef63eedec..da9997efc01f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
@@ -12,6 +12,7 @@
#include "DIEHash.h"
#include "ByteStreamer.h"
+#include "DwarfCompileUnit.h"
#include "DwarfDebug.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
@@ -214,7 +215,15 @@ void DIEHash::hashDIEEntry(dwarf::Attribute Attribute, dwarf::Tag Tag,
// all of the data is going to be added as integers.
void DIEHash::hashBlockData(const DIE::const_value_range &Values) {
for (const auto &V : Values)
- Hash.update((uint64_t)V.getDIEInteger().getValue());
+ if (V.getType() == DIEValue::isBaseTypeRef) {
+ const DIE &C =
+ *CU->ExprRefedBaseTypes[V.getDIEBaseTypeRef().getIndex()].Die;
+ StringRef Name = getDIEStringAttr(C, dwarf::DW_AT_name);
+ assert(!Name.empty() &&
+ "Base types referenced from DW_OP_convert should have a name");
+ hashNestedType(C, Name);
+ } else
+ Hash.update((uint64_t)V.getDIEInteger().getValue());
}
// Hash the contents of a loclistptr class.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.h
index 1a69f6772873..29e1da4c5d60 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.h
@@ -31,7 +31,8 @@ class DIEHash {
};
public:
- DIEHash(AsmPrinter *A = nullptr) : AP(A) {}
+ DIEHash(AsmPrinter *A = nullptr, DwarfCompileUnit *CU = nullptr)
+ : AP(A), CU(CU) {}
/// Computes the CU signature.
uint64_t computeCUSignature(StringRef DWOName, const DIE &Die);
@@ -101,6 +102,7 @@ private:
private:
MD5 Hash;
AsmPrinter *AP;
+ DwarfCompileUnit *CU;
DenseMap<const DIE *, unsigned> Numbering;
};
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
index 584b7614915d..1c9131edab83 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
@@ -8,9 +8,11 @@
#include "llvm/CodeGen/DbgEntityHistoryCalculator.h"
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/LexicalScopes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -51,6 +53,37 @@ static Register isDescribedByReg(const MachineInstr &MI) {
: Register();
}
+void InstructionOrdering::initialize(const MachineFunction &MF) {
+ // We give meta instructions the same ordinal as the preceding instruction
+ // because this class is written for the task of comparing positions of
+ // variable location ranges against scope ranges. To reflect what we'll see
+ // in the binary, when we look at location ranges we must consider all
+ // DBG_VALUEs between two real instructions at the same position. And a
+ // scope range which ends on a meta instruction should be considered to end
+ // at the last seen real instruction. E.g.
+ //
+ // 1 instruction p Both the variable location for x and for y start
+ // 1 DBG_VALUE for "x" after instruction p so we give them all the same
+ // 1 DBG_VALUE for "y" number. If a scope range ends at DBG_VALUE for "y",
+ // 2 instruction q we should treat it as ending after instruction p
+ // because it will be the last real instruction in the
+ // range. DBG_VALUEs at or after this position for
+ // variables declared in the scope will have no effect.
+ clear();
+ unsigned Position = 0;
+ for (const MachineBasicBlock &MBB : MF)
+ for (const MachineInstr &MI : MBB)
+ InstNumberMap[&MI] = MI.isMetaInstruction() ? Position : ++Position;
+}
+
+bool InstructionOrdering::isBefore(const MachineInstr *A,
+ const MachineInstr *B) const {
+ assert(A->getParent() && B->getParent() && "Operands must have a parent");
+ assert(A->getMF() == B->getMF() &&
+ "Operands must be in the same MachineFunction");
+ return InstNumberMap.lookup(A) < InstNumberMap.lookup(B);
+}
+
bool DbgValueHistoryMap::startDbgValue(InlinedEntity Var,
const MachineInstr &MI,
EntryIndex &NewIndex) {
@@ -90,6 +123,156 @@ void DbgValueHistoryMap::Entry::endEntry(EntryIndex Index) {
EndIndex = Index;
}
+/// Check if the instruction range [StartMI, EndMI] intersects any instruction
+/// range in Ranges. EndMI can be nullptr to indicate that the range is
+/// unbounded. Assumes Ranges is ordered and disjoint. Returns true and points
+/// to the first intersecting scope range if one exists.
+static Optional<ArrayRef<InsnRange>::iterator>
+intersects(const MachineInstr *StartMI, const MachineInstr *EndMI,
+ const ArrayRef<InsnRange> &Ranges,
+ const InstructionOrdering &Ordering) {
+ for (auto RangesI = Ranges.begin(), RangesE = Ranges.end();
+ RangesI != RangesE; ++RangesI) {
+ if (EndMI && Ordering.isBefore(EndMI, RangesI->first))
+ return None;
+ if (EndMI && !Ordering.isBefore(RangesI->second, EndMI))
+ return RangesI;
+ if (Ordering.isBefore(StartMI, RangesI->second))
+ return RangesI;
+ }
+ return None;
+}
+
+void DbgValueHistoryMap::trimLocationRanges(
+ const MachineFunction &MF, LexicalScopes &LScopes,
+ const InstructionOrdering &Ordering) {
+ // The indices of the entries we're going to remove for each variable.
+ SmallVector<EntryIndex, 4> ToRemove;
+ // Entry reference count for each variable. Clobbers left with no references
+ // will be removed.
+ SmallVector<int, 4> ReferenceCount;
+ // Entries reference other entries by index. Offsets is used to remap these
+ // references if any entries are removed.
+ SmallVector<size_t, 4> Offsets;
+
+ for (auto &Record : VarEntries) {
+ auto &HistoryMapEntries = Record.second;
+ if (HistoryMapEntries.empty())
+ continue;
+
+ InlinedEntity Entity = Record.first;
+ const DILocalVariable *LocalVar = cast<DILocalVariable>(Entity.first);
+
+ LexicalScope *Scope = nullptr;
+ if (const DILocation *InlinedAt = Entity.second) {
+ Scope = LScopes.findInlinedScope(LocalVar->getScope(), InlinedAt);
+ } else {
+ Scope = LScopes.findLexicalScope(LocalVar->getScope());
+ // Ignore variables for non-inlined function level scopes. The scope
+ // ranges (from scope->getRanges()) will not include any instructions
+ // before the first one with a debug-location, which could cause us to
+ // incorrectly drop a location. We could introduce special casing for
+ // these variables, but it doesn't seem worth it because no out-of-scope
+ // locations have been observed for variables declared in function level
+ // scopes.
+ if (Scope &&
+ (Scope->getScopeNode() == Scope->getScopeNode()->getSubprogram()) &&
+ (Scope->getScopeNode() == LocalVar->getScope()))
+ continue;
+ }
+
+ // If there is no scope for the variable then something has probably gone
+ // wrong.
+ if (!Scope)
+ continue;
+
+ ToRemove.clear();
+ // Zero the reference counts.
+ ReferenceCount.assign(HistoryMapEntries.size(), 0);
+ // Index of the DBG_VALUE which marks the start of the current location
+ // range.
+ EntryIndex StartIndex = 0;
+ ArrayRef<InsnRange> ScopeRanges(Scope->getRanges());
+ for (auto EI = HistoryMapEntries.begin(), EE = HistoryMapEntries.end();
+ EI != EE; ++EI, ++StartIndex) {
+ // Only DBG_VALUEs can open location ranges so skip anything else.
+ if (!EI->isDbgValue())
+ continue;
+
+ // Index of the entry which closes this range.
+ EntryIndex EndIndex = EI->getEndIndex();
+ // If this range is closed bump the reference count of the closing entry.
+ if (EndIndex != NoEntry)
+ ReferenceCount[EndIndex] += 1;
+ // Skip this location range if the opening entry is still referenced. It
+ // may close a location range which intersects a scope range.
+ // TODO: We could be 'smarter' and trim these kinds of ranges such that
+ // they do not leak out of the scope ranges if they partially overlap.
+ if (ReferenceCount[StartIndex] > 0)
+ continue;
+
+ const MachineInstr *StartMI = EI->getInstr();
+ const MachineInstr *EndMI = EndIndex != NoEntry
+ ? HistoryMapEntries[EndIndex].getInstr()
+ : nullptr;
+ // Check if the location range [StartMI, EndMI] intersects with any scope
+ // range for the variable.
+ if (auto R = intersects(StartMI, EndMI, ScopeRanges, Ordering)) {
+ // Adjust ScopeRanges to exclude ranges which subsequent location ranges
+ // cannot possibly intersect.
+ ScopeRanges = ArrayRef<InsnRange>(R.getValue(), ScopeRanges.end());
+ } else {
+ // If the location range does not intersect any scope range then the
+ // DBG_VALUE which opened this location range is usless, mark it for
+ // removal.
+ ToRemove.push_back(StartIndex);
+ // Because we'll be removing this entry we need to update the reference
+ // count of the closing entry, if one exists.
+ if (EndIndex != NoEntry)
+ ReferenceCount[EndIndex] -= 1;
+ }
+ }
+
+ // If there is nothing to remove then jump to next variable.
+ if (ToRemove.empty())
+ continue;
+
+ // Mark clobbers that will no longer close any location ranges for removal.
+ for (size_t i = 0; i < HistoryMapEntries.size(); ++i)
+ if (ReferenceCount[i] <= 0 && HistoryMapEntries[i].isClobber())
+ ToRemove.push_back(i);
+
+ llvm::sort(ToRemove);
+
+ // Build an offset map so we can update the EndIndex of the remaining
+ // entries.
+ // Zero the offsets.
+ Offsets.assign(HistoryMapEntries.size(), 0);
+ size_t CurOffset = 0;
+ auto ToRemoveItr = ToRemove.begin();
+ for (size_t EntryIdx = *ToRemoveItr; EntryIdx < HistoryMapEntries.size();
+ ++EntryIdx) {
+ // Check if this is an entry which will be removed.
+ if (ToRemoveItr != ToRemove.end() && *ToRemoveItr == EntryIdx) {
+ ++ToRemoveItr;
+ ++CurOffset;
+ }
+ Offsets[EntryIdx] = CurOffset;
+ }
+
+ // Update the EndIndex of the entries to account for those which will be
+ // removed.
+ for (auto &Entry : HistoryMapEntries)
+ if (Entry.isClosed())
+ Entry.EndIndex -= Offsets[Entry.EndIndex];
+
+ // Now actually remove the entries. Iterate backwards so that our remaining
+ // ToRemove indices are valid after each erase.
+ for (auto Itr = ToRemove.rbegin(), End = ToRemove.rend(); Itr != End; ++Itr)
+ HistoryMapEntries.erase(HistoryMapEntries.begin() + *Itr);
+ }
+}
+
void DbgLabelInstrMap::addInstr(InlinedEntity Label, const MachineInstr &MI) {
assert(MI.isDebugLabel() && "not a DBG_LABEL");
LabelInstr[Label] = &MI;
@@ -234,7 +417,7 @@ void llvm::calculateDbgEntityHistory(const MachineFunction *MF,
DbgValueHistoryMap &DbgValues,
DbgLabelInstrMap &DbgLabels) {
const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
- unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
+ Register SP = TLI->getStackPointerRegisterToSaveRestore();
Register FrameReg = TRI->getFrameRegister(*MF);
RegDescribedVarsMap RegVars;
DbgValueEntriesMap LiveEntries;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
index 880791a06d93..68a4bfba42a7 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
@@ -21,11 +21,16 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/CommandLine.h"
using namespace llvm;
#define DEBUG_TYPE "dwarfdebug"
+/// If true, we drop variable location ranges which exist entirely outside the
+/// variable's lexical scope instruction ranges.
+static cl::opt<bool> TrimVarLocs("trim-var-locs", cl::Hidden, cl::init(true));
+
Optional<DbgVariableLocation>
DbgVariableLocation::extractFromMachineInstruction(
const MachineInstr &Instruction) {
@@ -86,6 +91,11 @@ DbgVariableLocation::extractFromMachineInstruction(
DebugHandlerBase::DebugHandlerBase(AsmPrinter *A) : Asm(A), MMI(Asm->MMI) {}
+void DebugHandlerBase::beginModule(Module *M) {
+ if (M->debug_compile_units().empty())
+ Asm = nullptr;
+}
+
// Each LexicalScope has first instruction and last instruction to mark
// beginning and end of a scope respectively. Create an inverse map that list
// scopes starts (and ends) with an instruction. One instruction may start (or
@@ -153,6 +163,54 @@ uint64_t DebugHandlerBase::getBaseTypeSize(const DIType *Ty) {
return getBaseTypeSize(BaseType);
}
+bool DebugHandlerBase::isUnsignedDIType(const DIType *Ty) {
+ if (auto *CTy = dyn_cast<DICompositeType>(Ty)) {
+ // FIXME: Enums without a fixed underlying type have unknown signedness
+ // here, leading to incorrectly emitted constants.
+ if (CTy->getTag() == dwarf::DW_TAG_enumeration_type)
+ return false;
+
+ // (Pieces of) aggregate types that get hacked apart by SROA may be
+ // represented by a constant. Encode them as unsigned bytes.
+ return true;
+ }
+
+ if (auto *DTy = dyn_cast<DIDerivedType>(Ty)) {
+ dwarf::Tag T = (dwarf::Tag)Ty->getTag();
+ // Encode pointer constants as unsigned bytes. This is used at least for
+ // null pointer constant emission.
+ // FIXME: reference and rvalue_reference /probably/ shouldn't be allowed
+ // here, but accept them for now due to a bug in SROA producing bogus
+ // dbg.values.
+ if (T == dwarf::DW_TAG_pointer_type ||
+ T == dwarf::DW_TAG_ptr_to_member_type ||
+ T == dwarf::DW_TAG_reference_type ||
+ T == dwarf::DW_TAG_rvalue_reference_type)
+ return true;
+ assert(T == dwarf::DW_TAG_typedef || T == dwarf::DW_TAG_const_type ||
+ T == dwarf::DW_TAG_volatile_type ||
+ T == dwarf::DW_TAG_restrict_type || T == dwarf::DW_TAG_atomic_type);
+ assert(DTy->getBaseType() && "Expected valid base type");
+ return isUnsignedDIType(DTy->getBaseType());
+ }
+
+ auto *BTy = cast<DIBasicType>(Ty);
+ unsigned Encoding = BTy->getEncoding();
+ assert((Encoding == dwarf::DW_ATE_unsigned ||
+ Encoding == dwarf::DW_ATE_unsigned_char ||
+ Encoding == dwarf::DW_ATE_signed ||
+ Encoding == dwarf::DW_ATE_signed_char ||
+ Encoding == dwarf::DW_ATE_float || Encoding == dwarf::DW_ATE_UTF ||
+ Encoding == dwarf::DW_ATE_boolean ||
+ (Ty->getTag() == dwarf::DW_TAG_unspecified_type &&
+ Ty->getName() == "decltype(nullptr)")) &&
+ "Unsupported encoding");
+ return Encoding == dwarf::DW_ATE_unsigned ||
+ Encoding == dwarf::DW_ATE_unsigned_char ||
+ Encoding == dwarf::DW_ATE_UTF || Encoding == dwarf::DW_ATE_boolean ||
+ Ty->getTag() == dwarf::DW_TAG_unspecified_type;
+}
+
static bool hasDebugInfo(const MachineModuleInfo *MMI,
const MachineFunction *MF) {
if (!MMI->hasDebugInfo())
@@ -191,6 +249,9 @@ void DebugHandlerBase::beginFunction(const MachineFunction *MF) {
assert(DbgLabels.empty() && "DbgLabels map wasn't cleaned!");
calculateDbgEntityHistory(MF, Asm->MF->getSubtarget().getRegisterInfo(),
DbgValues, DbgLabels);
+ InstOrdering.initialize(*MF);
+ if (TrimVarLocs)
+ DbgValues.trimLocationRanges(*MF, LScopes, InstOrdering);
LLVM_DEBUG(DbgValues.dump());
// Request labels for the full history.
@@ -212,10 +273,16 @@ void DebugHandlerBase::beginFunction(const MachineFunction *MF) {
// doing that violates the ranges that are calculated in the history map.
// However, we currently do not emit debug values for constant arguments
// directly at the start of the function, so this code is still useful.
+ // FIXME: If the first mention of an argument is in a unique section basic
+ // block, we cannot always assign the CurrentFnBeginLabel as it lies in a
+ // different section. Temporarily, we disable generating loc list
+ // information or DW_AT_const_value when the block is in a different
+ // section.
const DILocalVariable *DIVar =
Entries.front().getInstr()->getDebugVariable();
if (DIVar->isParameter() &&
- getDISubprogram(DIVar->getScope())->describes(&MF->getFunction())) {
+ getDISubprogram(DIVar->getScope())->describes(&MF->getFunction()) &&
+ Entries.front().getInstr()->getParent()->sameSection(&MF->front())) {
if (!IsDescribedByReg(Entries.front().getInstr()))
LabelsBeforeInsn[Entries.front().getInstr()] = Asm->getFunctionBegin();
if (Entries.front().getInstr()->getDebugExpression()->isFragment()) {
@@ -262,7 +329,7 @@ void DebugHandlerBase::beginFunction(const MachineFunction *MF) {
}
void DebugHandlerBase::beginInstruction(const MachineInstr *MI) {
- if (!MMI->hasDebugInfo())
+ if (!Asm || !MMI->hasDebugInfo())
return;
assert(CurMI == nullptr);
@@ -288,7 +355,7 @@ void DebugHandlerBase::beginInstruction(const MachineInstr *MI) {
}
void DebugHandlerBase::endInstruction() {
- if (!MMI->hasDebugInfo())
+ if (!Asm || !MMI->hasDebugInfo())
return;
assert(CurMI != nullptr);
@@ -320,12 +387,13 @@ void DebugHandlerBase::endInstruction() {
}
void DebugHandlerBase::endFunction(const MachineFunction *MF) {
- if (hasDebugInfo(MMI, MF))
+ if (Asm && hasDebugInfo(MMI, MF))
endFunctionImpl(MF);
DbgValues.clear();
DbgLabels.clear();
LabelsBeforeInsn.clear();
LabelsAfterInsn.clear();
+ InstOrdering.clear();
}
void DebugHandlerBase::beginBasicBlock(const MachineBasicBlock &MBB) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
index 11ed1062f77e..c20ac6040aef 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -81,8 +81,9 @@ void DwarfCFIException::endModule() {
}
}
-static MCSymbol *getExceptionSym(AsmPrinter *Asm) {
- return Asm->getCurExceptionSym();
+static MCSymbol *getExceptionSym(AsmPrinter *Asm,
+ const MachineBasicBlock *MBB) {
+ return Asm->getMBBExceptionSym(*MBB);
}
void DwarfCFIException::beginFunction(const MachineFunction *MF) {
@@ -161,7 +162,7 @@ void DwarfCFIException::beginFragment(const MachineBasicBlock *MBB,
// Provide LSDA information.
if (shouldEmitLSDA)
- Asm->OutStreamer->emitCFILsda(ESP(Asm), TLOF.getLSDAEncoding());
+ Asm->OutStreamer->emitCFILsda(ESP(Asm, MBB), TLOF.getLSDAEncoding());
}
/// endFunction - Gather and emit post-function exception information.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 296c380ae550..befc4bba19a2 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -12,18 +12,12 @@
#include "DwarfCompileUnit.h"
#include "AddressPool.h"
-#include "DwarfDebug.h"
#include "DwarfExpression.h"
-#include "DwarfUnit.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/DIE.h"
-#include "llvm/CodeGen/LexicalScopes.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
@@ -32,22 +26,16 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
-#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolWasm.h"
#include "llvm/MC/MachineLocation.h"
-#include "llvm/Support/Casting.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
-#include <algorithm>
-#include <cassert>
-#include <cstdint>
#include <iterator>
-#include <memory>
#include <string>
#include <utility>
@@ -117,7 +105,7 @@ unsigned DwarfCompileUnit::getOrCreateSourceID(const DIFile *File) {
return Asm->OutStreamer->emitDwarfFileDirective(0, "", "", None, None,
CUID);
return Asm->OutStreamer->emitDwarfFileDirective(
- 0, File->getDirectory(), File->getFilename(), getMD5AsBytes(File),
+ 0, File->getDirectory(), File->getFilename(), DD->getMD5AsBytes(File),
File->getSource(), CUID);
}
@@ -260,7 +248,9 @@ void DwarfCompileUnit::addLocationAttribute(
: dwarf::DW_OP_const8u);
// 2) containing the (relocated) offset of the TLS variable
// within the module's TLS block.
- addExpr(*Loc, dwarf::DW_FORM_udata,
+ addExpr(*Loc,
+ PointerSize == 4 ? dwarf::DW_FORM_data4
+ : dwarf::DW_FORM_data8,
Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym));
} else {
addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index);
@@ -432,7 +422,10 @@ DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) {
// FIXME: duplicated from Target/WebAssembly/WebAssembly.h
// don't want to depend on target specific headers in this code?
const unsigned TI_GLOBAL_RELOC = 3;
- if (FrameBase.Location.WasmLoc.Kind == TI_GLOBAL_RELOC) {
+ // FIXME: when writing dwo, we need to avoid relocations. Probably
+ // the "right" solution is to treat globals the way func and data symbols
+ // are (with entries in .debug_addr).
+ if (FrameBase.Location.WasmLoc.Kind == TI_GLOBAL_RELOC && !isDwoUnit()) {
// These need to be relocatable.
assert(FrameBase.Location.WasmLoc.Index == 0); // Only SP so far.
auto SPSym = cast<MCSymbolWasm>(
@@ -449,8 +442,8 @@ DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) {
true});
DIELoc *Loc = new (DIEValueAllocator) DIELoc;
addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_WASM_location);
- addSInt(*Loc, dwarf::DW_FORM_sdata, FrameBase.Location.WasmLoc.Kind);
- addLabel(*Loc, dwarf::DW_FORM_udata, SPSym);
+ addSInt(*Loc, dwarf::DW_FORM_sdata, TI_GLOBAL_RELOC);
+ addLabel(*Loc, dwarf::DW_FORM_data4, SPSym);
DD->addArangeLabel(SymbolCU(this, SPSym));
addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_stack_value);
addBlock(*SPDie, dwarf::DW_AT_frame_base, Loc);
@@ -565,7 +558,12 @@ void DwarfCompileUnit::addScopeRangeList(DIE &ScopeDIE,
void DwarfCompileUnit::attachRangesOrLowHighPC(
DIE &Die, SmallVector<RangeSpan, 2> Ranges) {
- if (Ranges.size() == 1 || !DD->useRangesSection()) {
+ assert(!Ranges.empty());
+ if (!DD->useRangesSection() ||
+ (Ranges.size() == 1 &&
+ (!DD->alwaysUseRanges() ||
+ DD->getSectionLabel(&Ranges.front().Begin->getSection()) ==
+ Ranges.front().Begin))) {
const RangeSpan &Front = Ranges.front();
const RangeSpan &Back = Ranges.back();
attachLowHighPC(Die, Front.Begin, Back.End);
@@ -688,9 +686,9 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
// Add variable address.
- unsigned Offset = DV.getDebugLocListIndex();
- if (Offset != ~0U) {
- addLocationList(*VariableDie, dwarf::DW_AT_location, Offset);
+ unsigned Index = DV.getDebugLocListIndex();
+ if (Index != ~0U) {
+ addLocationList(*VariableDie, dwarf::DW_AT_location, Index);
auto TagOffset = DV.getDebugLocListTagOffset();
if (TagOffset)
addUInt(*VariableDie, dwarf::DW_AT_LLVM_tag_offset, dwarf::DW_FORM_data1,
@@ -722,6 +720,13 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
addConstantFPValue(*VariableDie, DVal->getConstantFP());
} else if (DVal->isConstantInt()) {
addConstantValue(*VariableDie, DVal->getConstantInt(), DV.getType());
+ } else if (DVal->isTargetIndexLocation()) {
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+ DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
+ const DIBasicType *BT = dyn_cast<DIBasicType>(
+ static_cast<const Metadata *>(DV.getVariable()->getType()));
+ DwarfDebug::emitDebugLocValue(*Asm, BT, *DVal, DwarfExpr);
+ addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize());
}
return VariableDie;
}
@@ -737,10 +742,14 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
Register FrameReg;
const DIExpression *Expr = Fragment.Expr;
const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering();
- int Offset = TFI->getFrameIndexReference(*Asm->MF, Fragment.FI, FrameReg);
+ StackOffset Offset =
+ TFI->getFrameIndexReference(*Asm->MF, Fragment.FI, FrameReg);
DwarfExpr.addFragmentOffset(Expr);
+
+ auto *TRI = Asm->MF->getSubtarget().getRegisterInfo();
SmallVector<uint64_t, 8> Ops;
- DIExpression::appendOffset(Ops, Offset);
+ TRI->getOffsetOpcodes(Offset, Ops);
+
// According to
// https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf
// cuda-gdb requires DW_AT_address_class for all variables to be able to
@@ -801,6 +810,10 @@ static SmallVector<const DIVariable *, 2> dependencies(DbgVariable *Var) {
return Result;
if (auto *DLVar = Array->getDataLocation())
Result.push_back(DLVar);
+ if (auto *AsVar = Array->getAssociated())
+ Result.push_back(AsVar);
+ if (auto *AlVar = Array->getAllocated())
+ Result.push_back(AlVar);
for (auto *El : Array->getElements()) {
if (auto *Subrange = dyn_cast<DISubrange>(El)) {
if (auto Count = Subrange->getCount())
@@ -815,6 +828,19 @@ static SmallVector<const DIVariable *, 2> dependencies(DbgVariable *Var) {
if (auto ST = Subrange->getStride())
if (auto *Dependency = ST.dyn_cast<DIVariable *>())
Result.push_back(Dependency);
+ } else if (auto *GenericSubrange = dyn_cast<DIGenericSubrange>(El)) {
+ if (auto Count = GenericSubrange->getCount())
+ if (auto *Dependency = Count.dyn_cast<DIVariable *>())
+ Result.push_back(Dependency);
+ if (auto LB = GenericSubrange->getLowerBound())
+ if (auto *Dependency = LB.dyn_cast<DIVariable *>())
+ Result.push_back(Dependency);
+ if (auto UB = GenericSubrange->getUpperBound())
+ if (auto *Dependency = UB.dyn_cast<DIVariable *>())
+ Result.push_back(Dependency);
+ if (auto ST = GenericSubrange->getStride())
+ if (auto *Dependency = ST.dyn_cast<DIVariable *>())
+ Result.push_back(Dependency);
}
}
return Result;
@@ -996,7 +1022,7 @@ void DwarfCompileUnit::constructAbstractSubprogramScopeDIE(
}
bool DwarfCompileUnit::useGNUAnalogForDwarf5Feature() const {
- return DD->getDwarfVersion() == 4 && DD->tuneForGDB();
+ return DD->getDwarfVersion() == 4 && !DD->tuneForLLDB();
}
dwarf::Tag DwarfCompileUnit::getDwarf5OrGNUTag(dwarf::Tag Tag) const {
@@ -1352,11 +1378,9 @@ void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die,
/// Add a Dwarf loclistptr attribute data and value.
void DwarfCompileUnit::addLocationList(DIE &Die, dwarf::Attribute Attribute,
unsigned Index) {
- dwarf::Form Form = dwarf::DW_FORM_data4;
- if (DD->getDwarfVersion() == 4)
- Form =dwarf::DW_FORM_sec_offset;
- if (DD->getDwarfVersion() >= 5)
- Form =dwarf::DW_FORM_loclistx;
+ dwarf::Form Form = (DD->getDwarfVersion() >= 5)
+ ? dwarf::DW_FORM_loclistx
+ : DD->getDwarfSectionOffsetForm();
Die.addValue(DIEValueAllocator, Attribute, Form, DIELocList(Index));
}
@@ -1417,8 +1441,8 @@ void DwarfCompileUnit::addAddrTableBase() {
const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
MCSymbol *Label = DD->getAddressPool().getLabel();
addSectionLabel(getUnitDie(),
- getDwarfVersion() >= 5 ? dwarf::DW_AT_addr_base
- : dwarf::DW_AT_GNU_addr_base,
+ DD->getDwarfVersion() >= 5 ? dwarf::DW_AT_addr_base
+ : dwarf::DW_AT_GNU_addr_base,
Label, TLOF.getDwarfAddrSection()->getBeginSymbol());
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index 4ccd8c96dd0d..6d8186a5ee2b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -22,7 +22,6 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/DbgEntityHistoryCalculator.h"
-#include "llvm/CodeGen/DIE.h"
#include "llvm/CodeGen/LexicalScopes.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/Support/Casting.h"
@@ -34,6 +33,9 @@
namespace llvm {
class AsmPrinter;
+class DIE;
+class DIELoc;
+class DIEValueList;
class DwarfFile;
class GlobalVariable;
class MCExpr;
@@ -55,7 +57,7 @@ class DwarfCompileUnit final : public DwarfUnit {
DwarfCompileUnit *Skeleton = nullptr;
/// The start of the unit within its section.
- MCSymbol *LabelBegin;
+ MCSymbol *LabelBegin = nullptr;
/// The start of the unit macro info within macro section.
MCSymbol *MacroLabelBegin;
@@ -287,8 +289,8 @@ public:
return DwarfUnit::getHeaderSize() + DWOIdSize;
}
unsigned getLength() {
- return sizeof(uint32_t) + // Length field
- getHeaderSize() + getUnitDie().getSize();
+ return Asm->getUnitLengthFieldByteSize() + // Length field
+ getHeaderSize() + getUnitDie().getSize();
}
void emitHeader(bool UseOffsets) override;
@@ -297,7 +299,7 @@ public:
void addAddrTableBase();
MCSymbol *getLabelBegin() const {
- assert(getSection());
+ assert(LabelBegin && "LabelBegin is not initialized");
return LabelBegin;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 45ed5256deb9..462682743c6a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -13,30 +13,18 @@
#include "DwarfDebug.h"
#include "ByteStreamer.h"
#include "DIEHash.h"
-#include "DebugLocEntry.h"
-#include "DebugLocStream.h"
#include "DwarfCompileUnit.h"
#include "DwarfExpression.h"
-#include "DwarfFile.h"
#include "DwarfUnit.h"
#include "llvm/ADT/APInt.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
-#include "llvm/BinaryFormat/Dwarf.h"
-#include "llvm/CodeGen/AccelTable.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/DIE.h"
#include "llvm/CodeGen/LexicalScopes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
@@ -46,14 +34,11 @@
#include "llvm/DebugInfo/DWARF/DWARFExpression.h"
#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/DebugInfoMetadata.h"
-#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Module.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
@@ -71,15 +56,10 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
#include <algorithm>
-#include <cassert>
#include <cstddef>
-#include <cstdint>
#include <iterator>
#include <string>
-#include <utility>
-#include <vector>
using namespace llvm;
@@ -87,18 +67,10 @@ using namespace llvm;
STATISTIC(NumCSParams, "Number of dbg call site params created");
-static cl::opt<bool>
-DisableDebugInfoPrinting("disable-debug-info-print", cl::Hidden,
- cl::desc("Disable debug info printing"));
-
static cl::opt<bool> UseDwarfRangesBaseAddressSpecifier(
"use-dwarf-ranges-base-address-specifier", cl::Hidden,
cl::desc("Use base address specifiers in debug_ranges"), cl::init(false));
-static cl::opt<bool> EmitDwarfDebugEntryValues(
- "emit-debug-entry-values", cl::Hidden,
- cl::desc("Emit the debug entry values"), cl::init(false));
-
static cl::opt<bool> GenerateARangeSection("generate-arange-section",
cl::Hidden,
cl::desc("Generate dwarf aranges"),
@@ -151,6 +123,18 @@ static cl::opt<DefaultOnOff> DwarfSectionsAsReferences(
clEnumVal(Enable, "Enabled"), clEnumVal(Disable, "Disabled")),
cl::init(Default));
+static cl::opt<bool>
+ UseGNUDebugMacro("use-gnu-debug-macro", cl::Hidden,
+ cl::desc("Emit the GNU .debug_macro format with DWARF <5"),
+ cl::init(false));
+
+static cl::opt<DefaultOnOff> DwarfOpConvert(
+ "dwarf-op-convert", cl::Hidden,
+ cl::desc("Enable use of the DWARFv5 DW_OP_convert operator"),
+ cl::values(clEnumVal(Default, "Default for platform"),
+ clEnumVal(Enable, "Enabled"), clEnumVal(Disable, "Disabled")),
+ cl::init(Default));
+
enum LinkageNameOption {
DefaultLinkageNames,
AllLinkageNames,
@@ -167,19 +151,23 @@ static cl::opt<LinkageNameOption>
"Abstract subprograms")),
cl::init(DefaultLinkageNames));
-static cl::opt<unsigned> LocationAnalysisSizeLimit(
- "singlevarlocation-input-bb-limit",
- cl::desc("Maximum block size to analyze for single-location variables"),
- cl::init(30000), cl::Hidden);
+static cl::opt<DwarfDebug::MinimizeAddrInV5> MinimizeAddrInV5Option(
+ "minimize-addr-in-v5", cl::Hidden,
+ cl::desc("Always use DW_AT_ranges in DWARFv5 whenever it could allow more "
+ "address pool entry sharing to reduce relocations/object size"),
+ cl::values(clEnumValN(DwarfDebug::MinimizeAddrInV5::Default, "Default",
+ "Default address minimization strategy"),
+ clEnumValN(DwarfDebug::MinimizeAddrInV5::Ranges, "Ranges",
+ "Use rnglists for contiguous ranges if that allows "
+ "using a pre-existing base address"),
+ clEnumValN(DwarfDebug::MinimizeAddrInV5::Disabled, "Disabled",
+ "Stuff")),
+ cl::init(DwarfDebug::MinimizeAddrInV5::Default));
-static const char *const DWARFGroupName = "dwarf";
-static const char *const DWARFGroupDescription = "DWARF Emission";
-static const char *const DbgTimerName = "writer";
-static const char *const DbgTimerDescription = "DWARF Debug Writer";
static constexpr unsigned ULEB128PadSize = 4;
void DebugLocDwarfExpression::emitOp(uint8_t Op, const char *Comment) {
- getActiveStreamer().EmitInt8(
+ getActiveStreamer().emitInt8(
Op, Comment ? Twine(Comment) + " " + dwarf::OperationEncodingString(Op)
: dwarf::OperationEncodingString(Op));
}
@@ -193,7 +181,7 @@ void DebugLocDwarfExpression::emitUnsigned(uint64_t Value) {
}
void DebugLocDwarfExpression::emitData1(uint8_t Value) {
- getActiveStreamer().EmitInt8(Value, Twine(Value));
+ getActiveStreamer().emitInt8(Value, Twine(Value));
}
void DebugLocDwarfExpression::emitBaseTypeRef(uint64_t Idx) {
@@ -202,7 +190,7 @@ void DebugLocDwarfExpression::emitBaseTypeRef(uint64_t Idx) {
}
bool DebugLocDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI,
- unsigned MachineReg) {
+ llvm::Register MachineReg) {
// This information is not available while emitting .debug_loc entries.
return false;
}
@@ -227,7 +215,7 @@ void DebugLocDwarfExpression::commitTemporaryBuffer() {
const char *Comment = (Byte.index() < TmpBuf->Comments.size())
? TmpBuf->Comments[Byte.index()].c_str()
: "";
- OutBS.EmitInt8(Byte.value(), Comment);
+ OutBS.emitInt8(Byte.value(), Comment);
}
TmpBuf->Bytes.clear();
TmpBuf->Comments.clear();
@@ -242,8 +230,8 @@ static DbgValueLoc getDebugLocValue(const MachineInstr *MI) {
const DIExpression *Expr = MI->getDebugExpression();
assert(MI->getNumOperands() == 4);
if (MI->getDebugOperand(0).isReg()) {
- auto RegOp = MI->getDebugOperand(0);
- auto Op1 = MI->getDebugOffset();
+ const auto &RegOp = MI->getDebugOperand(0);
+ const auto &Op1 = MI->getDebugOffset();
// If the second operand is an immediate, this is a
// register-indirect address.
assert((!Op1.isImm() || (Op1.getImm() == 0)) && "unexpected offset");
@@ -251,7 +239,7 @@ static DbgValueLoc getDebugLocValue(const MachineInstr *MI) {
return DbgValueLoc(Expr, MLoc);
}
if (MI->getDebugOperand(0).isTargetIndex()) {
- auto Op = MI->getDebugOperand(0);
+ const auto &Op = MI->getDebugOperand(0);
return DbgValueLoc(Expr,
TargetIndexLocation(Op.getIndex(), Op.getOffset()));
}
@@ -354,7 +342,7 @@ static AccelTableKind computeAccelTableKind(unsigned DwarfVersion,
return AccelTableKind::None;
}
-DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
+DwarfDebug::DwarfDebug(AsmPrinter *A)
: DebugHandlerBase(A), DebugLocs(A->OutStreamer->isVerboseAsm()),
InfoHolder(A, "info_string", DIEValueAllocator),
SkeletonHolder(A, "skel_string", DIEValueAllocator),
@@ -397,6 +385,11 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
DwarfVersion =
TT.isNVPTX() ? 2 : (DwarfVersion ? DwarfVersion : dwarf::DWARF_VERSION);
+ bool Dwarf64 = Asm->TM.Options.MCOptions.Dwarf64 &&
+ DwarfVersion >= 3 && // DWARF64 was introduced in DWARFv3.
+ TT.isArch64Bit() && // DWARF64 requires 64-bit relocations.
+ TT.isOSBinFormatELF(); // Support only ELF for now.
+
UseRangesSection = !NoDwarfRangesSection && !TT.isNVPTX();
// Use sections as references. Force for NVPTX.
@@ -406,8 +399,9 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
UseSectionsAsReferences = DwarfSectionsAsReferences == Enable;
// Don't generate type units for unsupported object file formats.
- GenerateTypeUnits =
- A->TM.getTargetTriple().isOSBinFormatELF() && GenerateDwarfTypeUnits;
+ GenerateTypeUnits = (A->TM.getTargetTriple().isOSBinFormatELF() ||
+ A->TM.getTargetTriple().isOSBinFormatWasm()) &&
+ GenerateDwarfTypeUnits;
TheAccelTableKind = computeAccelTableKind(
DwarfVersion, GenerateTypeUnits, DebuggerTuning, A->TM.getTargetTriple());
@@ -430,11 +424,31 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
// Emit call-site-param debug info for GDB and LLDB, if the target supports
// the debug entry values feature. It can also be enabled explicitly.
- EmitDebugEntryValues = (Asm->TM.Options.ShouldEmitDebugEntryValues() &&
- (tuneForGDB() || tuneForLLDB())) ||
- EmitDwarfDebugEntryValues;
+ EmitDebugEntryValues = Asm->TM.Options.ShouldEmitDebugEntryValues();
+
+ // It is unclear if the GCC .debug_macro extension is well-specified
+ // for split DWARF. For now, do not allow LLVM to emit it.
+ UseDebugMacroSection =
+ DwarfVersion >= 5 || (UseGNUDebugMacro && !useSplitDwarf());
+ if (DwarfOpConvert == Default)
+ EnableOpConvert = !((tuneForGDB() && useSplitDwarf()) || (tuneForLLDB() && !TT.isOSBinFormatMachO()));
+ else
+ EnableOpConvert = (DwarfOpConvert == Enable);
+
+ // Split DWARF would benefit object size significantly by trading reductions
+ // in address pool usage for slightly increased range list encodings.
+ if (DwarfVersion >= 5) {
+ MinimizeAddr = MinimizeAddrInV5Option;
+ // FIXME: In the future, enable this by default for Split DWARF where the
+ // tradeoff is more pronounced due to being able to offload the range
+ // lists to the dwo file and shrink object files/reduce relocations there.
+ if (MinimizeAddr == MinimizeAddrInV5::Default)
+ MinimizeAddr = MinimizeAddrInV5::Disabled;
+ }
Asm->OutStreamer->getContext().setDwarfVersion(DwarfVersion);
+ Asm->OutStreamer->getContext().setDwarfFormat(Dwarf64 ? dwarf::DWARF64
+ : dwarf::DWARF32);
}
// Define out of line so we don't have to include DwarfUnit.h in DwarfDebug.h.
@@ -583,7 +597,7 @@ static const DIExpression *combineDIExpressions(const DIExpression *Original,
std::vector<uint64_t> Elts = Addition->getElements().vec();
// Avoid multiple DW_OP_stack_values.
if (Original->isImplicit() && Addition->isImplicit())
- erase_if(Elts, [](uint64_t Op) { return Op == dwarf::DW_OP_stack_value; });
+ erase_value(Elts, dwarf::DW_OP_stack_value);
const DIExpression *CombinedExpr =
(Elts.size() > 0) ? DIExpression::append(Original, Elts) : Original;
return CombinedExpr;
@@ -709,11 +723,11 @@ static void interpretValues(const MachineInstr *CurMI,
ForwardedRegWorklist[ParamFwdReg], Params);
} else if (ParamValue->first.isReg()) {
Register RegLoc = ParamValue->first.getReg();
- unsigned SP = TLI.getStackPointerRegisterToSaveRestore();
+ Register SP = TLI.getStackPointerRegisterToSaveRestore();
Register FP = TRI.getFrameRegister(*MF);
bool IsSPorFP = (RegLoc == SP) || (RegLoc == FP);
if (TRI.isCalleeSavedPhysReg(RegLoc, *MF) || IsSPorFP) {
- MachineLocation MLoc(RegLoc, /*IsIndirect=*/IsSPorFP);
+ MachineLocation MLoc(RegLoc, /*Indirect=*/IsSPorFP);
finishCallSiteParams(MLoc, ParamValue->second,
ForwardedRegWorklist[ParamFwdReg], Params);
} else {
@@ -797,6 +811,11 @@ static void collectCallSiteParameters(const MachineInstr *CallMI,
(void)InsertedReg;
}
+ // Do not emit CSInfo for undef forwarding registers.
+ for (auto &MO : CallMI->uses())
+ if (MO.isReg() && MO.isUndef())
+ ForwardedRegWorklist.erase(MO.getReg());
+
// We erase, from the ForwardedRegWorklist, those forwarding registers for
// which we successfully describe a loaded value (by using
// the describeLoadedValue()). For those remaining arguments in the working
@@ -1071,9 +1090,8 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) {
// compilation directory.
if (!Asm->OutStreamer->hasRawTextSupport() || SingleCU)
Asm->OutStreamer->emitDwarfFile0Directive(
- CompilationDir, DIUnit->getFilename(),
- NewCU.getMD5AsBytes(DIUnit->getFile()), DIUnit->getSource(),
- NewCU.getUniqueID());
+ CompilationDir, DIUnit->getFilename(), getMD5AsBytes(DIUnit->getFile()),
+ DIUnit->getSource(), NewCU.getUniqueID());
if (useSplitDwarf()) {
NewCU.setSkeleton(constructSkeletonCU(NewCU));
@@ -1126,21 +1144,17 @@ sortGlobalExprs(SmallVectorImpl<DwarfCompileUnit::GlobalExpr> &GVEs) {
// Emit all Dwarf sections that should come prior to the content. Create
// global DIEs and emit initial debug info sections. This is invoked by
// the target AsmPrinter.
-void DwarfDebug::beginModule() {
- NamedRegionTimer T(DbgTimerName, DbgTimerDescription, DWARFGroupName,
- DWARFGroupDescription, TimePassesIsEnabled);
- if (DisableDebugInfoPrinting) {
- MMI->setDebugInfoAvailability(false);
- return;
- }
+void DwarfDebug::beginModule(Module *M) {
+ DebugHandlerBase::beginModule(M);
- const Module *M = MMI->getModule();
+ if (!Asm || !MMI->hasDebugInfo())
+ return;
unsigned NumDebugCUs = std::distance(M->debug_compile_units_begin(),
M->debug_compile_units_end());
- // Tell MMI whether we have debug info.
- assert(MMI->hasDebugInfo() == (NumDebugCUs > 0) &&
- "DebugInfoAvailabilty initialized unexpectedly");
+ assert(NumDebugCUs > 0 && "Asm unexpectedly initialized");
+ assert(MMI->hasDebugInfo() &&
+ "DebugInfoAvailabilty unexpectedly not initialized");
SingleCU = NumDebugCUs == 1;
DenseMap<DIGlobalVariable *, SmallVector<DwarfCompileUnit::GlobalExpr, 1>>
GVMap;
@@ -1292,7 +1306,7 @@ void DwarfDebug::finalizeModuleInfo() {
Asm->TM.Options.MCOptions.SplitDwarfFile);
// Emit a unique identifier for this CU.
uint64_t ID =
- DIEHash(Asm).computeCUSignature(DWOName, TheCU.getUnitDie());
+ DIEHash(Asm, &TheCU).computeCUSignature(DWOName, TheCU.getUnitDie());
if (getDwarfVersion() >= 5) {
TheCU.setDWOId(ID);
SkCU->setDWOId(ID);
@@ -1353,15 +1367,18 @@ void DwarfDebug::finalizeModuleInfo() {
// If compile Unit has macros, emit "DW_AT_macro_info/DW_AT_macros"
// attribute.
if (CUNode->getMacros()) {
- if (getDwarfVersion() >= 5) {
+ if (UseDebugMacroSection) {
if (useSplitDwarf())
TheCU.addSectionDelta(
TheCU.getUnitDie(), dwarf::DW_AT_macros, U.getMacroLabelBegin(),
TLOF.getDwarfMacroDWOSection()->getBeginSymbol());
- else
- U.addSectionLabel(U.getUnitDie(), dwarf::DW_AT_macros,
- U.getMacroLabelBegin(),
+ else {
+ dwarf::Attribute MacrosAttr = getDwarfVersion() >= 5
+ ? dwarf::DW_AT_macros
+ : dwarf::DW_AT_GNU_macros;
+ U.addSectionLabel(U.getUnitDie(), MacrosAttr, U.getMacroLabelBegin(),
TLOF.getDwarfMacroSection()->getBeginSymbol());
+ }
} else {
if (useSplitDwarf())
TheCU.addSectionDelta(
@@ -1398,9 +1415,8 @@ void DwarfDebug::endModule() {
}
// If we aren't actually generating debug info (check beginModule -
- // conditionalized on !DisableDebugInfoPrinting and the presence of the
- // llvm.dbg.cu metadata node)
- if (!MMI->hasDebugInfo())
+ // conditionalized on the presence of the llvm.dbg.cu metadata node)
+ if (!Asm || !MMI->hasDebugInfo())
return;
// Finalize the debug info for the module.
@@ -1532,7 +1548,8 @@ void DwarfDebug::collectVariableInfoFromMFTable(
/// either open or otherwise rolls off the end of the scope.
static bool validThroughout(LexicalScopes &LScopes,
const MachineInstr *DbgValue,
- const MachineInstr *RangeEnd) {
+ const MachineInstr *RangeEnd,
+ const InstructionOrdering &Ordering) {
assert(DbgValue->getDebugLoc() && "DBG_VALUE without a debug location");
auto MBB = DbgValue->getParent();
auto DL = DbgValue->getDebugLoc();
@@ -1544,34 +1561,30 @@ static bool validThroughout(LexicalScopes &LScopes,
if (LSRange.size() == 0)
return false;
-
- // Determine if the DBG_VALUE is valid at the beginning of its lexical block.
const MachineInstr *LScopeBegin = LSRange.front().first;
- // Early exit if the lexical scope begins outside of the current block.
- if (LScopeBegin->getParent() != MBB)
- return false;
-
- // If there are instructions belonging to our scope in another block, and
- // we're not a constant (see DWARF2 comment below), then we can't be
- // validThroughout.
- const MachineInstr *LScopeEnd = LSRange.back().second;
- if (RangeEnd && LScopeEnd->getParent() != MBB)
- return false;
-
- MachineBasicBlock::const_reverse_iterator Pred(DbgValue);
- for (++Pred; Pred != MBB->rend(); ++Pred) {
- if (Pred->getFlag(MachineInstr::FrameSetup))
- break;
- auto PredDL = Pred->getDebugLoc();
- if (!PredDL || Pred->isMetaInstruction())
- continue;
- // Check whether the instruction preceding the DBG_VALUE is in the same
- // (sub)scope as the DBG_VALUE.
- if (DL->getScope() == PredDL->getScope())
- return false;
- auto *PredScope = LScopes.findLexicalScope(PredDL);
- if (!PredScope || LScope->dominates(PredScope))
+ // If the scope starts before the DBG_VALUE then we may have a negative
+ // result. Otherwise the location is live coming into the scope and we
+ // can skip the following checks.
+ if (!Ordering.isBefore(DbgValue, LScopeBegin)) {
+ // Exit if the lexical scope begins outside of the current block.
+ if (LScopeBegin->getParent() != MBB)
return false;
+
+ MachineBasicBlock::const_reverse_iterator Pred(DbgValue);
+ for (++Pred; Pred != MBB->rend(); ++Pred) {
+ if (Pred->getFlag(MachineInstr::FrameSetup))
+ break;
+ auto PredDL = Pred->getDebugLoc();
+ if (!PredDL || Pred->isMetaInstruction())
+ continue;
+ // Check whether the instruction preceding the DBG_VALUE is in the same
+ // (sub)scope as the DBG_VALUE.
+ if (DL->getScope() == PredDL->getScope())
+ return false;
+ auto *PredScope = LScopes.findLexicalScope(PredDL);
+ if (!PredScope || LScope->dominates(PredScope))
+ return false;
+ }
}
// If the range of the DBG_VALUE is open-ended, report success.
@@ -1585,24 +1598,10 @@ static bool validThroughout(LexicalScopes &LScopes,
if (DbgValue->getDebugOperand(0).isImm() && MBB->pred_empty())
return true;
- // Now check for situations where an "open-ended" DBG_VALUE isn't enough to
- // determine eligibility for a single location, e.g. nested scopes, inlined
- // functions.
- // FIXME: For now we just handle a simple (but common) case where the scope
- // is contained in MBB. We could be smarter here.
- //
- // At this point we know that our scope ends in MBB. So, if RangeEnd exists
- // outside of the block we can ignore it; the location is just leaking outside
- // its scope.
- assert(LScopeEnd->getParent() == MBB && "Scope ends outside MBB");
- if (RangeEnd->getParent() != DbgValue->getParent())
- return true;
-
- // The location range and variable's enclosing scope are both contained within
- // MBB, test if location terminates before end of scope.
- for (auto I = RangeEnd->getIterator(); I != MBB->end(); ++I)
- if (&*I == LScopeEnd)
- return false;
+ // Test if the location terminates before the end of the scope.
+ const MachineInstr *LScopeEnd = LSRange.back().second;
+ if (Ordering.isBefore(RangeEnd, LScopeEnd))
+ return false;
// There's a single location which starts at the scope start, and ends at or
// after the scope end.
@@ -1642,10 +1641,8 @@ static bool validThroughout(LexicalScopes &LScopes,
// [1-3) [(reg0, fragment 0, 32), (reg1, fragment 32, 32)]
// [3-4) [(reg1, fragment 32, 32), (123, fragment 64, 32)]
// [4-) [(@g, fragment 0, 96)]
-bool DwarfDebug::buildLocationList(
- SmallVectorImpl<DebugLocEntry> &DebugLoc,
- const DbgValueHistoryMap::Entries &Entries,
- DenseSet<const MachineBasicBlock *> &VeryLargeBlocks) {
+bool DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
+ const DbgValueHistoryMap::Entries &Entries) {
using OpenRange =
std::pair<DbgValueHistoryMap::EntryIndex, DbgValueLoc>;
SmallVector<OpenRange, 4> OpenRanges;
@@ -1658,9 +1655,7 @@ bool DwarfDebug::buildLocationList(
// Remove all values that are no longer live.
size_t Index = std::distance(EB, EI);
- auto Last =
- remove_if(OpenRanges, [&](OpenRange &R) { return R.first <= Index; });
- OpenRanges.erase(Last, OpenRanges.end());
+ erase_if(OpenRanges, [&](OpenRange &R) { return R.first <= Index; });
// If we are dealing with a clobbering entry, this iteration will result in
// a location list entry starting after the clobbering instruction.
@@ -1741,14 +1736,8 @@ bool DwarfDebug::buildLocationList(
DebugLoc.pop_back();
}
- // If there's a single entry, safe for a single location, and not part of
- // an over-sized basic block, then ask validThroughout whether this
- // location can be represented as a single variable location.
- if (DebugLoc.size() != 1 || !isSafeForSingleLocation)
- return false;
- if (VeryLargeBlocks.count(StartDebugMI->getParent()))
- return false;
- return validThroughout(LScopes, StartDebugMI, EndMI);
+ return DebugLoc.size() == 1 && isSafeForSingleLocation &&
+ validThroughout(LScopes, StartDebugMI, EndMI, getInstOrdering());
}
DbgEntity *DwarfDebug::createConcreteEntity(DwarfCompileUnit &TheCU,
@@ -1780,13 +1769,6 @@ void DwarfDebug::collectEntityInfo(DwarfCompileUnit &TheCU,
// Grab the variable info that was squirreled away in the MMI side-table.
collectVariableInfoFromMFTable(TheCU, Processed);
- // Identify blocks that are unreasonably sized, so that we can later
- // skip lexical scope analysis over them.
- DenseSet<const MachineBasicBlock *> VeryLargeBlocks;
- for (const auto &MBB : *CurFn)
- if (MBB.size() > LocationAnalysisSizeLimit)
- VeryLargeBlocks.insert(&MBB);
-
for (const auto &I : DbgValues) {
InlinedEntity IV = I.first;
if (Processed.count(IV))
@@ -1823,8 +1805,7 @@ void DwarfDebug::collectEntityInfo(DwarfCompileUnit &TheCU,
if (HistSize == 1 || SingleValueWithClobber) {
const auto *End =
SingleValueWithClobber ? HistoryMapEntries[1].getInstr() : nullptr;
- if (VeryLargeBlocks.count(MInsn->getParent()) == 0 &&
- validThroughout(LScopes, MInsn, End)) {
+ if (validThroughout(LScopes, MInsn, End, getInstOrdering())) {
RegVar->initializeDbgValue(MInsn);
continue;
}
@@ -1839,8 +1820,7 @@ void DwarfDebug::collectEntityInfo(DwarfCompileUnit &TheCU,
// Build the location list for this variable.
SmallVector<DebugLocEntry, 8> Entries;
- bool isValidSingleLocation =
- buildLocationList(Entries, HistoryMapEntries, VeryLargeBlocks);
+ bool isValidSingleLocation = buildLocationList(Entries, HistoryMapEntries);
// Check whether buildLocationList managed to merge all locations to one
// that is valid throughout the variable's scope. If so, produce single
@@ -1945,7 +1925,8 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
}
DebugHandlerBase::beginInstruction(MI);
- assert(CurMI);
+ if (!CurMI)
+ return;
if (NoDebug)
return;
@@ -2382,10 +2363,10 @@ void DwarfDebug::emitDebugPubSection(bool GnuStyle, StringRef Name,
TheU = Skeleton;
// Emit the header.
- Asm->OutStreamer->AddComment("Length of Public " + Name + " Info");
MCSymbol *BeginLabel = Asm->createTempSymbol("pub" + Name + "_begin");
MCSymbol *EndLabel = Asm->createTempSymbol("pub" + Name + "_end");
- Asm->emitLabelDifference(EndLabel, BeginLabel, 4);
+ Asm->emitDwarfUnitLength(EndLabel, BeginLabel,
+ "Length of Public " + Name + " Info");
Asm->OutStreamer->emitLabel(BeginLabel);
@@ -2396,7 +2377,7 @@ void DwarfDebug::emitDebugPubSection(bool GnuStyle, StringRef Name,
emitSectionReference(*TheU);
Asm->OutStreamer->AddComment("Compilation Unit Length");
- Asm->emitInt32(TheU->getLength());
+ Asm->emitDwarfLengthOrOffset(TheU->getLength());
// Emit the pubnames for this compilation unit.
for (const auto &GI : Globals) {
@@ -2404,7 +2385,7 @@ void DwarfDebug::emitDebugPubSection(bool GnuStyle, StringRef Name,
const DIE *Entity = GI.second;
Asm->OutStreamer->AddComment("DIE offset");
- Asm->emitInt32(Entity->getOffset());
+ Asm->emitDwarfLengthOrOffset(Entity->getOffset());
if (GnuStyle) {
dwarf::PubIndexEntryDescriptor Desc = computeIndexValue(TheU, Entity);
@@ -2419,7 +2400,7 @@ void DwarfDebug::emitDebugPubSection(bool GnuStyle, StringRef Name,
}
Asm->OutStreamer->AddComment("End Mark");
- Asm->emitInt32(0);
+ Asm->emitDwarfLengthOrOffset(0);
Asm->OutStreamer->emitLabel(EndLabel);
}
@@ -2458,7 +2439,7 @@ void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer,
for (auto &Op : Expr) {
assert(Op.getCode() != dwarf::DW_OP_const_type &&
"3 operand ops not yet supported");
- Streamer.EmitInt8(Op.getCode(), Comment != End ? *(Comment++) : "");
+ Streamer.emitInt8(Op.getCode(), Comment != End ? *(Comment++) : "");
Offset++;
for (unsigned I = 0; I < 2; ++I) {
if (Op.getDescription().Op[I] == Encoding::SizeNA)
@@ -2474,7 +2455,7 @@ void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer,
Comment++;
} else {
for (uint64_t J = Offset; J < Op.getOperandEndOffset(I); ++J)
- Streamer.EmitInt8(Data.getData()[J], Comment != End ? *(Comment++) : "");
+ Streamer.emitInt8(Data.getData()[J], Comment != End ? *(Comment++) : "");
}
Offset = Op.getOperandEndOffset(I);
}
@@ -2511,10 +2492,26 @@ void DwarfDebug::emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT,
TargetIndexLocation Loc = Value.getTargetIndexLocation();
// TODO TargetIndexLocation is a target-independent. Currently only the WebAssembly-specific
// encoding is supported.
+ assert(AP.TM.getTargetTriple().isWasm());
DwarfExpr.addWasmLocation(Loc.Index, static_cast<uint64_t>(Loc.Offset));
+ DwarfExpr.addExpression(std::move(ExprCursor));
+ return;
} else if (Value.isConstantFP()) {
- APInt RawBytes = Value.getConstantFP()->getValueAPF().bitcastToAPInt();
- DwarfExpr.addUnsignedConstant(RawBytes);
+ if (AP.getDwarfVersion() >= 4 && !AP.getDwarfDebug()->tuneForSCE() &&
+ !ExprCursor) {
+ DwarfExpr.addConstantFP(Value.getConstantFP()->getValueAPF(), AP);
+ return;
+ }
+ if (Value.getConstantFP()->getValueAPF().bitcastToAPInt().getBitWidth() <=
+ 64 /*bits*/)
+ DwarfExpr.addUnsignedConstant(
+ Value.getConstantFP()->getValueAPF().bitcastToAPInt());
+ else
+ LLVM_DEBUG(
+ dbgs()
+ << "Skipped DwarfExpression creation for ConstantFP of size"
+ << Value.getConstantFP()->getValueAPF().bitcastToAPInt().getBitWidth()
+ << " bits\n");
}
DwarfExpr.addExpression(std::move(ExprCursor));
}
@@ -2537,7 +2534,7 @@ void DebugLocEntry::finalize(const AsmPrinter &AP,
}) && "all values are expected to be fragments");
assert(llvm::is_sorted(Values) && "fragments are expected to be sorted");
- for (auto Fragment : Values)
+ for (const auto &Fragment : Values)
DwarfDebug::emitDebugLocValue(AP, BT, Fragment, DwarfExpr);
} else {
@@ -2580,7 +2577,8 @@ static MCSymbol *emitRnglistsTableHeader(AsmPrinter *Asm,
Asm->OutStreamer->emitLabel(Holder.getRnglistsTableBaseSym());
for (const RangeSpanList &List : Holder.getRangeLists())
- Asm->emitLabelDifference(List.Label, Holder.getRnglistsTableBaseSym(), 4);
+ Asm->emitLabelDifference(List.Label, Holder.getRnglistsTableBaseSym(),
+ Asm->getDwarfOffsetByteSize());
return TableEnd;
}
@@ -2599,7 +2597,8 @@ static MCSymbol *emitLoclistsTableHeader(AsmPrinter *Asm,
Asm->OutStreamer->emitLabel(DebugLocs.getSym());
for (const auto &List : DebugLocs.getLists())
- Asm->emitLabelDifference(List.Label, DebugLocs.getSym(), 4);
+ Asm->emitLabelDifference(List.Label, DebugLocs.getSym(),
+ Asm->getDwarfOffsetByteSize());
return TableEnd;
}
@@ -2881,23 +2880,23 @@ void DwarfDebug::emitDebugARanges() {
// Emit size of content not including length itself.
unsigned ContentSize =
- sizeof(int16_t) + // DWARF ARange version number
- sizeof(int32_t) + // Offset of CU in the .debug_info section
- sizeof(int8_t) + // Pointer Size (in bytes)
- sizeof(int8_t); // Segment Size (in bytes)
+ sizeof(int16_t) + // DWARF ARange version number
+ Asm->getDwarfOffsetByteSize() + // Offset of CU in the .debug_info
+ // section
+ sizeof(int8_t) + // Pointer Size (in bytes)
+ sizeof(int8_t); // Segment Size (in bytes)
unsigned TupleSize = PtrSize * 2;
// 7.20 in the Dwarf specs requires the table to be aligned to a tuple.
- unsigned Padding =
- offsetToAlignment(sizeof(int32_t) + ContentSize, Align(TupleSize));
+ unsigned Padding = offsetToAlignment(
+ Asm->getUnitLengthFieldByteSize() + ContentSize, Align(TupleSize));
ContentSize += Padding;
ContentSize += (List.size() + 1) * TupleSize;
// For each compile unit, write the list of spans it covers.
- Asm->OutStreamer->AddComment("Length of ARange Set");
- Asm->emitInt32(ContentSize);
+ Asm->emitDwarfUnitLength(ContentSize, "Length of ARange Set");
Asm->OutStreamer->AddComment("DWARF Arange version number");
Asm->emitInt16(dwarf::DW_ARANGES_VERSION);
Asm->OutStreamer->AddComment("Offset Into Debug Info Section");
@@ -2983,25 +2982,30 @@ void DwarfDebug::emitDebugRangesDWO() {
Asm->getObjFileLowering().getDwarfRnglistsDWOSection());
}
-/// Emit the header of a DWARF 5 macro section.
+/// Emit the header of a DWARF 5 macro section, or the GNU extension for
+/// DWARF 4.
static void emitMacroHeader(AsmPrinter *Asm, const DwarfDebug &DD,
- const DwarfCompileUnit &CU) {
+ const DwarfCompileUnit &CU, uint16_t DwarfVersion) {
enum HeaderFlagMask {
#define HANDLE_MACRO_FLAG(ID, NAME) MACRO_FLAG_##NAME = ID,
#include "llvm/BinaryFormat/Dwarf.def"
};
- uint8_t Flags = 0;
Asm->OutStreamer->AddComment("Macro information version");
- Asm->emitInt16(5);
- // We are setting Offset and line offset flags unconditionally here,
- // since we're only supporting DWARF32 and line offset should be mostly
- // present.
- // FIXME: Add support for DWARF64.
- Flags |= MACRO_FLAG_DEBUG_LINE_OFFSET;
- Asm->OutStreamer->AddComment("Flags: 32 bit, debug_line_offset present");
- Asm->emitInt8(Flags);
+ Asm->emitInt16(DwarfVersion >= 5 ? DwarfVersion : 4);
+ // We emit the line offset flag unconditionally here, since line offset should
+ // be mostly present.
+ if (Asm->isDwarf64()) {
+ Asm->OutStreamer->AddComment("Flags: 64 bit, debug_line_offset present");
+ Asm->emitInt8(MACRO_FLAG_OFFSET_SIZE | MACRO_FLAG_DEBUG_LINE_OFFSET);
+ } else {
+ Asm->OutStreamer->AddComment("Flags: 32 bit, debug_line_offset present");
+ Asm->emitInt8(MACRO_FLAG_DEBUG_LINE_OFFSET);
+ }
Asm->OutStreamer->AddComment("debug_line_offset");
- Asm->OutStreamer->emitSymbolValue(CU.getLineTableStartSym(), /*Size=*/4);
+ if (DD.useSplitDwarf())
+ Asm->emitDwarfLengthOrOffset(0);
+ else
+ Asm->emitDwarfSymbolReference(CU.getLineTableStartSym());
}
void DwarfDebug::handleMacroNodes(DIMacroNodeArray Nodes, DwarfCompileUnit &U) {
@@ -3018,55 +3022,63 @@ void DwarfDebug::handleMacroNodes(DIMacroNodeArray Nodes, DwarfCompileUnit &U) {
void DwarfDebug::emitMacro(DIMacro &M) {
StringRef Name = M.getName();
StringRef Value = M.getValue();
- bool UseMacro = getDwarfVersion() >= 5;
-
- if (UseMacro) {
- unsigned Type = M.getMacinfoType() == dwarf::DW_MACINFO_define
- ? dwarf::DW_MACRO_define_strx
- : dwarf::DW_MACRO_undef_strx;
- Asm->OutStreamer->AddComment(dwarf::MacroString(Type));
- Asm->emitULEB128(Type);
- Asm->OutStreamer->AddComment("Line Number");
- Asm->emitULEB128(M.getLine());
- Asm->OutStreamer->AddComment("Macro String");
- if (!Value.empty())
- Asm->emitULEB128(this->InfoHolder.getStringPool()
- .getIndexedEntry(*Asm, (Name + " " + Value).str())
- .getIndex());
- else
- // DW_MACRO_undef_strx doesn't have a value, so just emit the macro
- // string.
- Asm->emitULEB128(this->InfoHolder.getStringPool()
- .getIndexedEntry(*Asm, (Name).str())
- .getIndex());
+
+ // There should be one space between the macro name and the macro value in
+ // define entries. In undef entries, only the macro name is emitted.
+ std::string Str = Value.empty() ? Name.str() : (Name + " " + Value).str();
+
+ if (UseDebugMacroSection) {
+ if (getDwarfVersion() >= 5) {
+ unsigned Type = M.getMacinfoType() == dwarf::DW_MACINFO_define
+ ? dwarf::DW_MACRO_define_strx
+ : dwarf::DW_MACRO_undef_strx;
+ Asm->OutStreamer->AddComment(dwarf::MacroString(Type));
+ Asm->emitULEB128(Type);
+ Asm->OutStreamer->AddComment("Line Number");
+ Asm->emitULEB128(M.getLine());
+ Asm->OutStreamer->AddComment("Macro String");
+ Asm->emitULEB128(
+ InfoHolder.getStringPool().getIndexedEntry(*Asm, Str).getIndex());
+ } else {
+ unsigned Type = M.getMacinfoType() == dwarf::DW_MACINFO_define
+ ? dwarf::DW_MACRO_GNU_define_indirect
+ : dwarf::DW_MACRO_GNU_undef_indirect;
+ Asm->OutStreamer->AddComment(dwarf::GnuMacroString(Type));
+ Asm->emitULEB128(Type);
+ Asm->OutStreamer->AddComment("Line Number");
+ Asm->emitULEB128(M.getLine());
+ Asm->OutStreamer->AddComment("Macro String");
+ Asm->emitDwarfSymbolReference(
+ InfoHolder.getStringPool().getEntry(*Asm, Str).getSymbol());
+ }
} else {
Asm->OutStreamer->AddComment(dwarf::MacinfoString(M.getMacinfoType()));
Asm->emitULEB128(M.getMacinfoType());
Asm->OutStreamer->AddComment("Line Number");
Asm->emitULEB128(M.getLine());
Asm->OutStreamer->AddComment("Macro String");
- Asm->OutStreamer->emitBytes(Name);
- if (!Value.empty()) {
- // There should be one space between macro name and macro value.
- Asm->emitInt8(' ');
- Asm->OutStreamer->AddComment("Macro Value=");
- Asm->OutStreamer->emitBytes(Value);
- }
+ Asm->OutStreamer->emitBytes(Str);
Asm->emitInt8('\0');
}
}
void DwarfDebug::emitMacroFileImpl(
- DIMacroFile &F, DwarfCompileUnit &U, unsigned StartFile, unsigned EndFile,
+ DIMacroFile &MF, DwarfCompileUnit &U, unsigned StartFile, unsigned EndFile,
StringRef (*MacroFormToString)(unsigned Form)) {
Asm->OutStreamer->AddComment(MacroFormToString(StartFile));
Asm->emitULEB128(StartFile);
Asm->OutStreamer->AddComment("Line Number");
- Asm->emitULEB128(F.getLine());
+ Asm->emitULEB128(MF.getLine());
Asm->OutStreamer->AddComment("File Number");
- Asm->emitULEB128(U.getOrCreateSourceID(F.getFile()));
- handleMacroNodes(F.getElements(), U);
+ DIFile &F = *MF.getFile();
+ if (useSplitDwarf())
+ Asm->emitULEB128(getDwoLineTable(U)->getFile(
+ F.getDirectory(), F.getFilename(), getMD5AsBytes(&F),
+ Asm->OutContext.getDwarfVersion(), F.getSource()));
+ else
+ Asm->emitULEB128(U.getOrCreateSourceID(&F));
+ handleMacroNodes(MF.getElements(), U);
Asm->OutStreamer->AddComment(MacroFormToString(EndFile));
Asm->emitULEB128(EndFile);
}
@@ -3075,10 +3087,10 @@ void DwarfDebug::emitMacroFile(DIMacroFile &F, DwarfCompileUnit &U) {
// DWARFv5 macro and DWARFv4 macinfo share some common encodings,
// so for readibility/uniformity, We are explicitly emitting those.
assert(F.getMacinfoType() == dwarf::DW_MACINFO_start_file);
- bool UseMacro = getDwarfVersion() >= 5;
- if (UseMacro)
- emitMacroFileImpl(F, U, dwarf::DW_MACRO_start_file,
- dwarf::DW_MACRO_end_file, dwarf::MacroString);
+ if (UseDebugMacroSection)
+ emitMacroFileImpl(
+ F, U, dwarf::DW_MACRO_start_file, dwarf::DW_MACRO_end_file,
+ (getDwarfVersion() >= 5) ? dwarf::MacroString : dwarf::GnuMacroString);
else
emitMacroFileImpl(F, U, dwarf::DW_MACINFO_start_file,
dwarf::DW_MACINFO_end_file, dwarf::MacinfoString);
@@ -3095,8 +3107,8 @@ void DwarfDebug::emitDebugMacinfoImpl(MCSection *Section) {
continue;
Asm->OutStreamer->SwitchSection(Section);
Asm->OutStreamer->emitLabel(U.getMacroLabelBegin());
- if (getDwarfVersion() >= 5)
- emitMacroHeader(Asm, *this, U);
+ if (UseDebugMacroSection)
+ emitMacroHeader(Asm, *this, U, getDwarfVersion());
handleMacroNodes(Macros, U);
Asm->OutStreamer->AddComment("End Of Macro List Mark");
Asm->emitInt8(0);
@@ -3106,14 +3118,14 @@ void DwarfDebug::emitDebugMacinfoImpl(MCSection *Section) {
/// Emit macros into a debug macinfo/macro section.
void DwarfDebug::emitDebugMacinfo() {
auto &ObjLower = Asm->getObjFileLowering();
- emitDebugMacinfoImpl(getDwarfVersion() >= 5
+ emitDebugMacinfoImpl(UseDebugMacroSection
? ObjLower.getDwarfMacroSection()
: ObjLower.getDwarfMacinfoSection());
}
void DwarfDebug::emitDebugMacinfoDWO() {
auto &ObjLower = Asm->getObjFileLowering();
- emitDebugMacinfoImpl(getDwarfVersion() >= 5
+ emitDebugMacinfoImpl(UseDebugMacroSection
? ObjLower.getDwarfMacroDWOSection()
: ObjLower.getDwarfMacinfoDWOSection());
}
@@ -3200,7 +3212,7 @@ MCDwarfDwoLineTable *DwarfDebug::getDwoLineTable(const DwarfCompileUnit &CU) {
const DICompileUnit *DIUnit = CU.getCUNode();
SplitTypeUnitFileTable.maybeSetRootFile(
DIUnit->getDirectory(), DIUnit->getFilename(),
- CU.getMD5AsBytes(DIUnit->getFile()), DIUnit->getSource());
+ getMD5AsBytes(DIUnit->getFile()), DIUnit->getSource());
return &SplitTypeUnitFileTable;
}
@@ -3303,14 +3315,14 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
DwarfDebug::NonTypeUnitContext::NonTypeUnitContext(DwarfDebug *DD)
: DD(DD),
- TypeUnitsUnderConstruction(std::move(DD->TypeUnitsUnderConstruction)) {
+ TypeUnitsUnderConstruction(std::move(DD->TypeUnitsUnderConstruction)), AddrPoolUsed(DD->AddrPool.hasBeenUsed()) {
DD->TypeUnitsUnderConstruction.clear();
- assert(TypeUnitsUnderConstruction.empty() || !DD->AddrPool.hasBeenUsed());
+ DD->AddrPool.resetUsedFlag();
}
DwarfDebug::NonTypeUnitContext::~NonTypeUnitContext() {
DD->TypeUnitsUnderConstruction = std::move(TypeUnitsUnderConstruction);
- DD->AddrPool.resetUsedFlag();
+ DD->AddrPool.resetUsedFlag(AddrPoolUsed);
}
DwarfDebug::NonTypeUnitContext DwarfDebug::enterNonTypeUnitContext() {
@@ -3375,6 +3387,15 @@ uint16_t DwarfDebug::getDwarfVersion() const {
return Asm->OutStreamer->getContext().getDwarfVersion();
}
+dwarf::Form DwarfDebug::getDwarfSectionOffsetForm() const {
+ if (Asm->getDwarfVersion() >= 4)
+ return dwarf::Form::DW_FORM_sec_offset;
+ assert((!Asm->isDwarf64() || (Asm->getDwarfVersion() == 3)) &&
+ "DWARF64 is not defined prior DWARFv3");
+ return Asm->isDwarf64() ? dwarf::Form::DW_FORM_data8
+ : dwarf::Form::DW_FORM_data4;
+}
+
const MCSymbol *DwarfDebug::getSectionLabel(const MCSection *S) {
return SectionLabels.find(S)->second;
}
@@ -3383,3 +3404,20 @@ void DwarfDebug::insertSectionLabel(const MCSymbol *S) {
if (useSplitDwarf() || getDwarfVersion() >= 5)
AddrPool.getIndex(S);
}
+
+Optional<MD5::MD5Result> DwarfDebug::getMD5AsBytes(const DIFile *File) const {
+ assert(File);
+ if (getDwarfVersion() < 5)
+ return None;
+ Optional<DIFile::ChecksumInfo<StringRef>> Checksum = File->getChecksum();
+ if (!Checksum || Checksum->Kind != DIFile::CSK_MD5)
+ return None;
+
+ // Convert the string checksum to an MD5Result for the streamer.
+ // The verifier validates the checksum so we assume it's okay.
+ // An MD5 checksum is 16 bytes.
+ std::string ChecksumString = fromHex(Checksum->Value);
+ MD5::MD5Result CKMem;
+ std::copy(ChecksumString.begin(), ChecksumString.end(), CKMem.Bytes.data());
+ return CKMem;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
index ad2f2f3edd8e..df19ef458888 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -114,7 +114,7 @@ public:
///
/// Variables that have been optimized out use none of these fields.
class DbgVariable : public DbgEntity {
- /// Offset in DebugLocs.
+ /// Index of the entry list in DebugLocs.
unsigned DebugLocListIndex = ~0u;
/// DW_OP_LLVM_tag_offset value from DebugLocs.
Optional<uint8_t> DebugLocListTagOffset;
@@ -372,6 +372,23 @@ class DwarfDebug : public DebugHandlerBase {
/// Generate DWARF v4 type units.
bool GenerateTypeUnits;
+ /// Emit a .debug_macro section instead of .debug_macinfo.
+ bool UseDebugMacroSection;
+
+ /// Avoid using DW_OP_convert due to consumer incompatibilities.
+ bool EnableOpConvert;
+
+public:
+ enum class MinimizeAddrInV5 {
+ Default,
+ Disabled,
+ Ranges,
+ };
+
+private:
+ /// Force the use of DW_AT_ranges even for single-entry range lists.
+ MinimizeAddrInV5 MinimizeAddr = MinimizeAddrInV5::Disabled;
+
/// DWARF5 Experimental Options
/// @{
AccelTableKind TheAccelTableKind;
@@ -409,6 +426,9 @@ class DwarfDebug : public DebugHandlerBase {
bool SingleCU;
bool IsDarwin;
+ /// Map for tracking Fortran deferred CHARACTER lengths.
+ DenseMap<const DIStringType *, unsigned> StringTypeLocMap;
+
AddressPool AddrPool;
/// Accelerator tables.
@@ -592,10 +612,8 @@ class DwarfDebug : public DebugHandlerBase {
/// function that describe the same variable. If the resulting
/// list has only one entry that is valid for entire variable's
/// scope return true.
- bool buildLocationList(
- SmallVectorImpl<DebugLocEntry> &DebugLoc,
- const DbgValueHistoryMap::Entries &Entries,
- DenseSet<const MachineBasicBlock *> &VeryLargeBlocks);
+ bool buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
+ const DbgValueHistoryMap::Entries &Entries);
/// Collect variable information from the side table maintained by MF.
void collectVariableInfoFromMFTable(DwarfCompileUnit &TheCU,
@@ -617,13 +635,13 @@ public:
//===--------------------------------------------------------------------===//
// Main entry points.
//
- DwarfDebug(AsmPrinter *A, Module *M);
+ DwarfDebug(AsmPrinter *A);
~DwarfDebug() override;
/// Emit all Dwarf sections that should come prior to the
/// content.
- void beginModule();
+ void beginModule(Module *M) override;
/// Emit all Dwarf sections that should come after the content.
void endModule() override;
@@ -645,6 +663,7 @@ public:
class NonTypeUnitContext {
DwarfDebug *DD;
decltype(DwarfDebug::TypeUnitsUnderConstruction) TypeUnitsUnderConstruction;
+ bool AddrPoolUsed;
friend class DwarfDebug;
NonTypeUnitContext(DwarfDebug *DD);
public:
@@ -681,6 +700,12 @@ public:
/// Returns whether ranges section should be emitted.
bool useRangesSection() const { return UseRangesSection; }
+ /// Returns whether range encodings should be used for single entry range
+ /// lists.
+ bool alwaysUseRanges() const {
+ return MinimizeAddr == MinimizeAddrInV5::Ranges;
+ }
+
/// Returns whether to use sections as labels rather than temp symbols.
bool useSectionsAsReferences() const {
return UseSectionsAsReferences;
@@ -719,11 +744,21 @@ public:
return EmitDebugEntryValues;
}
+ bool useOpConvert() const {
+ return EnableOpConvert;
+ }
+
bool shareAcrossDWOCUs() const;
/// Returns the Dwarf Version.
uint16_t getDwarfVersion() const;
+ /// Returns a suitable DWARF form to represent a section offset, i.e.
+ /// * DW_FORM_sec_offset for DWARF version >= 4;
+ /// * DW_FORM_data8 for 64-bit DWARFv3;
+ /// * DW_FORM_data4 for 32-bit DWARFv3 and DWARFv2.
+ dwarf::Form getDwarfSectionOffsetForm() const;
+
/// Returns the previous CU that was being updated
const DwarfCompileUnit *getPrevCU() const { return PrevCU; }
void setPrevCU(const DwarfCompileUnit *PrevCU) { this->PrevCU = PrevCU; }
@@ -768,6 +803,16 @@ public:
return CUDieMap.lookup(Die);
}
+ unsigned getStringTypeLoc(const DIStringType *ST) const {
+ return StringTypeLocMap.lookup(ST);
+ }
+
+ void addStringTypeLoc(const DIStringType *ST, unsigned Loc) {
+ assert(ST);
+ if (Loc)
+ StringTypeLocMap[ST] = Loc;
+ }
+
/// \defgroup DebuggerTuning Predicates to tune DWARF for a given debugger.
///
/// Returns whether we are "tuning" for a given debugger.
@@ -777,13 +822,16 @@ public:
bool tuneForSCE() const { return DebuggerTuning == DebuggerKind::SCE; }
/// @}
- void addSectionLabel(const MCSymbol *Sym);
const MCSymbol *getSectionLabel(const MCSection *S);
void insertSectionLabel(const MCSymbol *S);
static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT,
const DbgValueLoc &Value,
DwarfExpression &DwarfExpr);
+
+ /// If the \p File has an MD5 checksum, return it as an MD5Result
+ /// allocated in the MCContext.
+ Optional<MD5::MD5Result> getMD5AsBytes(const DIFile *File) const;
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfException.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
index c2956380438f..b19b4365383f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -92,6 +92,20 @@ public:
/// Gather and emit post-function exception information.
void endFunction(const MachineFunction *) override;
};
+
+class LLVM_LIBRARY_VISIBILITY AIXException : public DwarfCFIExceptionBase {
+ /// This is AIX's compat unwind section, which unwinder would use
+ /// to find the location of LSDA area and personality rountine.
+ void emitExceptionInfoTable(const MCSymbol *LSDA, const MCSymbol *PerSym);
+
+public:
+ AIXException(AsmPrinter *A);
+
+ void endModule() override {}
+ void beginFunction(const MachineFunction *MF) override {}
+
+ void endFunction(const MachineFunction *MF) override;
+};
} // End of namespace llvm
#endif
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index d4762121d105..59ad7646ce1c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -17,14 +17,14 @@
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/Register.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
-#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/Support/ErrorHandling.h"
#include <algorithm>
-#include <cassert>
-#include <cstdint>
using namespace llvm;
+#define DEBUG_TYPE "dwarfdebug"
+
void DwarfExpression::emitConstu(uint64_t Value) {
if (Value < 32)
emitOp(dwarf::DW_OP_lit0 + Value);
@@ -97,7 +97,8 @@ void DwarfExpression::addAnd(unsigned Mask) {
}
bool DwarfExpression::addMachineReg(const TargetRegisterInfo &TRI,
- unsigned MachineReg, unsigned MaxSize) {
+ llvm::Register MachineReg,
+ unsigned MaxSize) {
if (!llvm::Register::isPhysicalRegister(MachineReg)) {
if (isFrameRegister(TRI, MachineReg)) {
DwarfRegs.push_back(Register::createRegister(-1, nullptr));
@@ -219,9 +220,36 @@ void DwarfExpression::addUnsignedConstant(const APInt &Value) {
}
}
+void DwarfExpression::addConstantFP(const APFloat &APF, const AsmPrinter &AP) {
+ assert(isImplicitLocation() || isUnknownLocation());
+ APInt API = APF.bitcastToAPInt();
+ int NumBytes = API.getBitWidth() / 8;
+ if (NumBytes == 4 /*float*/ || NumBytes == 8 /*double*/) {
+ // FIXME: Add support for `long double`.
+ emitOp(dwarf::DW_OP_implicit_value);
+ emitUnsigned(NumBytes /*Size of the block in bytes*/);
+
+ // The loop below is emitting the value starting at least significant byte,
+ // so we need to perform a byte-swap to get the byte order correct in case
+ // of a big-endian target.
+ if (AP.getDataLayout().isBigEndian())
+ API = API.byteSwap();
+
+ for (int i = 0; i < NumBytes; ++i) {
+ emitData1(API.getZExtValue() & 0xFF);
+ API = API.lshr(8);
+ }
+
+ return;
+ }
+ LLVM_DEBUG(
+ dbgs() << "Skipped DW_OP_implicit_value creation for ConstantFP of size: "
+ << API.getBitWidth() << " bits\n");
+}
+
bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI,
DIExpressionCursor &ExprCursor,
- unsigned MachineReg,
+ llvm::Register MachineReg,
unsigned FragmentOffsetInBits) {
auto Fragment = ExprCursor.getFragmentInfo();
if (!addMachineReg(TRI, MachineReg, Fragment ? Fragment->SizeInBits : ~1U)) {
@@ -498,6 +526,7 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
case dwarf::DW_OP_not:
case dwarf::DW_OP_dup:
case dwarf::DW_OP_push_object_address:
+ case dwarf::DW_OP_over:
emitOp(OpNum);
break;
case dwarf::DW_OP_deref:
@@ -513,10 +542,15 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
assert(!isRegisterLocation());
emitConstu(Op->getArg(0));
break;
+ case dwarf::DW_OP_consts:
+ assert(!isRegisterLocation());
+ emitOp(dwarf::DW_OP_consts);
+ emitSigned(Op->getArg(0));
+ break;
case dwarf::DW_OP_LLVM_convert: {
unsigned BitSize = Op->getArg(0);
dwarf::TypeKind Encoding = static_cast<dwarf::TypeKind>(Op->getArg(1));
- if (DwarfVersion >= 5) {
+ if (DwarfVersion >= 5 && CU.getDwarfDebug().useOpConvert()) {
emitOp(dwarf::DW_OP_convert);
// If targeting a location-list; simply emit the index into the raw
// byte stream as ULEB128, DwarfDebug::emitDebugLocEntry has been
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
index 757b17511453..8fca9f5a630b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
@@ -218,7 +218,7 @@ protected:
/// Return whether the given machine register is the frame register in the
/// current function.
virtual bool isFrameRegister(const TargetRegisterInfo &TRI,
- unsigned MachineReg) = 0;
+ llvm::Register MachineReg) = 0;
/// Emit a DW_OP_reg operation. Note that this is only legal inside a DWARF
/// register location description.
@@ -245,7 +245,7 @@ protected:
/// multiple subregisters that alias the register.
///
/// \return false if no DWARF register exists for MachineReg.
- bool addMachineReg(const TargetRegisterInfo &TRI, unsigned MachineReg,
+ bool addMachineReg(const TargetRegisterInfo &TRI, llvm::Register MachineReg,
unsigned MaxSize = ~1U);
/// Emit a DW_OP_piece or DW_OP_bit_piece operation for a variable fragment.
@@ -299,6 +299,9 @@ public:
/// Emit an unsigned constant.
void addUnsignedConstant(const APInt &Value);
+ /// Emit an floating point constant.
+ void addConstantFP(const APFloat &Value, const AsmPrinter &AP);
+
/// Lock this down to become a memory location description.
void setMemoryLocationKind() {
assert(isUnknownLocation());
@@ -322,7 +325,8 @@ public:
/// \return false if no DWARF register exists
/// for MachineReg.
bool addMachineRegExpression(const TargetRegisterInfo &TRI,
- DIExpressionCursor &Expr, unsigned MachineReg,
+ DIExpressionCursor &Expr,
+ llvm::Register MachineReg,
unsigned FragmentOffsetInBits = 0);
/// Begin emission of an entry value dwarf operation. The entry value's
@@ -385,7 +389,7 @@ class DebugLocDwarfExpression final : public DwarfExpression {
void commitTemporaryBuffer() override;
bool isFrameRegister(const TargetRegisterInfo &TRI,
- unsigned MachineReg) override;
+ llvm::Register MachineReg) override;
public:
DebugLocDwarfExpression(unsigned DwarfVersion, BufferByteStreamer &BS,
@@ -415,7 +419,7 @@ class DIEDwarfExpression final : public DwarfExpression {
void commitTemporaryBuffer() override;
bool isFrameRegister(const TargetRegisterInfo &TRI,
- unsigned MachineReg) override;
+ llvm::Register MachineReg) override;
public:
DIEDwarfExpression(const AsmPrinter &AP, DwarfCompileUnit &CU, DIELoc &DIE);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
index 812e6383288f..838e1c9a10be 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
@@ -10,10 +10,9 @@
#include "DwarfCompileUnit.h"
#include "DwarfDebug.h"
#include "DwarfUnit.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/DIE.h"
#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/MC/MCStreamer.h"
#include <algorithm>
#include <cstdint>
@@ -59,7 +58,7 @@ void DwarfFile::emitUnit(DwarfUnit *TheU, bool UseOffsets) {
// Compute the size and offset for each DIE.
void DwarfFile::computeSizeAndOffsets() {
// Offset from the first CU in the debug info section is 0 initially.
- unsigned SecOffset = 0;
+ uint64_t SecOffset = 0;
// Iterate over each compile unit and set the size and offsets for each
// DIE within each compile unit. All offsets are CU relative.
@@ -75,12 +74,15 @@ void DwarfFile::computeSizeAndOffsets() {
TheU->setDebugSectionOffset(SecOffset);
SecOffset += computeSizeAndOffsetsForUnit(TheU.get());
}
+ if (SecOffset > UINT32_MAX && !Asm->isDwarf64())
+ report_fatal_error("The generated debug information is too large "
+ "for the 32-bit DWARF format.");
}
unsigned DwarfFile::computeSizeAndOffsetsForUnit(DwarfUnit *TheU) {
// CU-relative offset is reset to 0 here.
- unsigned Offset = sizeof(int32_t) + // Length of Unit Info
- TheU->getHeaderSize(); // Unit-specific headers
+ unsigned Offset = Asm->getUnitLengthFieldByteSize() + // Length of Unit Info
+ TheU->getHeaderSize(); // Unit-specific headers
// The return value here is CU-relative, after laying out
// all of the CU DIE.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
index cf293d7534d0..79a6ce7801b7 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
@@ -14,7 +14,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/DIE.h"
-#include "llvm/IR/Metadata.h"
#include "llvm/Support/Allocator.h"
#include <map>
#include <memory>
@@ -26,10 +25,12 @@ class AsmPrinter;
class DbgEntity;
class DbgVariable;
class DbgLabel;
+class DINode;
class DwarfCompileUnit;
class DwarfUnit;
class LexicalScope;
class MCSection;
+class MDNode;
// Data structure to hold a range for range lists.
struct RangeSpan {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
index a43929d8e8f7..a876f8ccace9 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
@@ -8,7 +8,6 @@
#include "DwarfStringPool.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/MC/MCAsmInfo.h"
@@ -33,7 +32,6 @@ DwarfStringPool::getEntryImpl(AsmPrinter &Asm, StringRef Str) {
Entry.Symbol = ShouldCreateSymbols ? Asm.createTempSymbol(Prefix) : nullptr;
NumBytes += Str.size() + 1;
- assert(NumBytes > Entry.Offset && "Unexpected overflow");
}
return *I.first;
}
@@ -58,13 +56,13 @@ void DwarfStringPool::emitStringOffsetsTableHeader(AsmPrinter &Asm,
if (getNumIndexedStrings() == 0)
return;
Asm.OutStreamer->SwitchSection(Section);
- unsigned EntrySize = 4;
- // FIXME: DWARF64
+ unsigned EntrySize = Asm.getDwarfOffsetByteSize();
// We are emitting the header for a contribution to the string offsets
// table. The header consists of an entry with the contribution's
// size (not including the size of the length field), the DWARF version and
// 2 bytes of padding.
- Asm.emitInt32(getNumIndexedStrings() * EntrySize + 4);
+ Asm.emitDwarfUnitLength(getNumIndexedStrings() * EntrySize + 4,
+ "Length of String Offsets Set");
Asm.emitInt16(Asm.getDwarfVersion());
Asm.emitInt16(0);
// Define the symbol that marks the start of the contribution. It is
@@ -120,7 +118,7 @@ void DwarfStringPool::emit(AsmPrinter &Asm, MCSection *StrSection,
}
Asm.OutStreamer->SwitchSection(OffsetSection);
- unsigned size = 4; // FIXME: DWARF64 is 8.
+ unsigned size = Asm.getDwarfOffsetByteSize();
for (const auto &Entry : Entries)
if (UseRelativeOffsets)
Asm.emitDwarfStringOffset(Entry->getValue());
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h
index c5f5637fdae3..79b5df89e338 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h
@@ -28,7 +28,7 @@ class DwarfStringPool {
StringMap<EntryTy, BumpPtrAllocator &> Pool;
StringRef Prefix;
- unsigned NumBytes = 0;
+ uint64_t NumBytes = 0;
unsigned NumIndexedStrings = 0;
bool ShouldCreateSymbols;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index ceeae14c1073..118b5fcc3bf6 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -13,7 +13,6 @@
#include "DwarfUnit.h"
#include "AddressPool.h"
#include "DwarfCompileUnit.h"
-#include "DwarfDebug.h"
#include "DwarfExpression.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
@@ -84,15 +83,14 @@ unsigned DIEDwarfExpression::getTemporaryBufferSize() {
void DIEDwarfExpression::commitTemporaryBuffer() { OutDIE.takeValues(TmpDIE); }
bool DIEDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI,
- unsigned MachineReg) {
+ llvm::Register MachineReg) {
return MachineReg == TRI.getFrameRegister(*AP.MF);
}
DwarfUnit::DwarfUnit(dwarf::Tag UnitTag, const DICompileUnit *Node,
AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU)
- : DIEUnit(A->getDwarfVersion(), A->MAI->getCodePointerSize(), UnitTag),
- CUNode(Node), Asm(A), DD(DW), DU(DWU), IndexTyDie(nullptr) {
-}
+ : DIEUnit(UnitTag), CUNode(Node), Asm(A), DD(DW), DU(DWU),
+ IndexTyDie(nullptr) {}
DwarfTypeUnit::DwarfTypeUnit(DwarfCompileUnit &CU, AsmPrinter *A,
DwarfDebug *DW, DwarfFile *DWU,
@@ -301,27 +299,7 @@ void DwarfUnit::addLabel(DIELoc &Die, dwarf::Form Form, const MCSymbol *Label) {
void DwarfUnit::addSectionOffset(DIE &Die, dwarf::Attribute Attribute,
uint64_t Integer) {
- if (DD->getDwarfVersion() >= 4)
- addUInt(Die, Attribute, dwarf::DW_FORM_sec_offset, Integer);
- else
- addUInt(Die, Attribute, dwarf::DW_FORM_data4, Integer);
-}
-
-Optional<MD5::MD5Result> DwarfUnit::getMD5AsBytes(const DIFile *File) const {
- assert(File);
- if (DD->getDwarfVersion() < 5)
- return None;
- Optional<DIFile::ChecksumInfo<StringRef>> Checksum = File->getChecksum();
- if (!Checksum || Checksum->Kind != DIFile::CSK_MD5)
- return None;
-
- // Convert the string checksum to an MD5Result for the streamer.
- // The verifier validates the checksum so we assume it's okay.
- // An MD5 checksum is 16 bytes.
- std::string ChecksumString = fromHex(Checksum->Value);
- MD5::MD5Result CKMem;
- std::copy(ChecksumString.begin(), ChecksumString.end(), CKMem.Bytes.data());
- return CKMem;
+ addUInt(Die, Attribute, DD->getDwarfSectionOffsetForm(), Integer);
}
unsigned DwarfTypeUnit::getOrCreateSourceID(const DIFile *File) {
@@ -332,10 +310,9 @@ unsigned DwarfTypeUnit::getOrCreateSourceID(const DIFile *File) {
// This is a split type unit that needs a line table.
addSectionOffset(getUnitDie(), dwarf::DW_AT_stmt_list, 0);
}
- return SplitLineTable->getFile(File->getDirectory(), File->getFilename(),
- getMD5AsBytes(File),
- Asm->OutContext.getDwarfVersion(),
- File->getSource());
+ return SplitLineTable->getFile(
+ File->getDirectory(), File->getFilename(), DD->getMD5AsBytes(File),
+ Asm->OutContext.getDwarfVersion(), File->getSource());
}
void DwarfUnit::addOpAddress(DIELoc &Die, const MCSymbol *Sym) {
@@ -353,7 +330,7 @@ void DwarfUnit::addOpAddress(DIELoc &Die, const MCSymbol *Sym) {
}
addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
- addLabel(Die, dwarf::DW_FORM_udata, Sym);
+ addLabel(Die, dwarf::DW_FORM_addr, Sym);
}
void DwarfUnit::addLabelDelta(DIE &Die, dwarf::Attribute Attribute,
@@ -457,77 +434,6 @@ void DwarfUnit::addSourceLine(DIE &Die, const DIObjCProperty *Ty) {
addSourceLine(Die, Ty->getLine(), Ty->getFile());
}
-/// Return true if type encoding is unsigned.
-static bool isUnsignedDIType(DwarfDebug *DD, const DIType *Ty) {
- if (auto *CTy = dyn_cast<DICompositeType>(Ty)) {
- // FIXME: Enums without a fixed underlying type have unknown signedness
- // here, leading to incorrectly emitted constants.
- if (CTy->getTag() == dwarf::DW_TAG_enumeration_type)
- return false;
-
- // (Pieces of) aggregate types that get hacked apart by SROA may be
- // represented by a constant. Encode them as unsigned bytes.
- return true;
- }
-
- if (auto *DTy = dyn_cast<DIDerivedType>(Ty)) {
- dwarf::Tag T = (dwarf::Tag)Ty->getTag();
- // Encode pointer constants as unsigned bytes. This is used at least for
- // null pointer constant emission.
- // FIXME: reference and rvalue_reference /probably/ shouldn't be allowed
- // here, but accept them for now due to a bug in SROA producing bogus
- // dbg.values.
- if (T == dwarf::DW_TAG_pointer_type ||
- T == dwarf::DW_TAG_ptr_to_member_type ||
- T == dwarf::DW_TAG_reference_type ||
- T == dwarf::DW_TAG_rvalue_reference_type)
- return true;
- assert(T == dwarf::DW_TAG_typedef || T == dwarf::DW_TAG_const_type ||
- T == dwarf::DW_TAG_volatile_type ||
- T == dwarf::DW_TAG_restrict_type || T == dwarf::DW_TAG_atomic_type);
- assert(DTy->getBaseType() && "Expected valid base type");
- return isUnsignedDIType(DD, DTy->getBaseType());
- }
-
- auto *BTy = cast<DIBasicType>(Ty);
- unsigned Encoding = BTy->getEncoding();
- assert((Encoding == dwarf::DW_ATE_unsigned ||
- Encoding == dwarf::DW_ATE_unsigned_char ||
- Encoding == dwarf::DW_ATE_signed ||
- Encoding == dwarf::DW_ATE_signed_char ||
- Encoding == dwarf::DW_ATE_float || Encoding == dwarf::DW_ATE_UTF ||
- Encoding == dwarf::DW_ATE_boolean ||
- (Ty->getTag() == dwarf::DW_TAG_unspecified_type &&
- Ty->getName() == "decltype(nullptr)")) &&
- "Unsupported encoding");
- return Encoding == dwarf::DW_ATE_unsigned ||
- Encoding == dwarf::DW_ATE_unsigned_char ||
- Encoding == dwarf::DW_ATE_UTF || Encoding == dwarf::DW_ATE_boolean ||
- Ty->getTag() == dwarf::DW_TAG_unspecified_type;
-}
-
-void DwarfUnit::addConstantFPValue(DIE &Die, const MachineOperand &MO) {
- assert(MO.isFPImm() && "Invalid machine operand!");
- DIEBlock *Block = new (DIEValueAllocator) DIEBlock;
- APFloat FPImm = MO.getFPImm()->getValueAPF();
-
- // Get the raw data form of the floating point.
- const APInt FltVal = FPImm.bitcastToAPInt();
- const char *FltPtr = (const char *)FltVal.getRawData();
-
- int NumBytes = FltVal.getBitWidth() / 8; // 8 bits per byte.
- bool LittleEndian = Asm->getDataLayout().isLittleEndian();
- int Incr = (LittleEndian ? 1 : -1);
- int Start = (LittleEndian ? 0 : NumBytes - 1);
- int Stop = (LittleEndian ? NumBytes : -1);
-
- // Output the constant to DWARF one byte at a time.
- for (; Start != Stop; Start += Incr)
- addUInt(*Block, dwarf::DW_FORM_data1, (unsigned char)0xFF & FltPtr[Start]);
-
- addBlock(Die, dwarf::DW_AT_const_value, Block);
-}
-
void DwarfUnit::addConstantFPValue(DIE &Die, const ConstantFP *CFP) {
// Pass this down to addConstantValue as an unsigned bag of bits.
addConstantValue(Die, CFP->getValueAPF().bitcastToAPInt(), true);
@@ -538,15 +444,8 @@ void DwarfUnit::addConstantValue(DIE &Die, const ConstantInt *CI,
addConstantValue(Die, CI->getValue(), Ty);
}
-void DwarfUnit::addConstantValue(DIE &Die, const MachineOperand &MO,
- const DIType *Ty) {
- assert(MO.isImm() && "Invalid machine operand!");
-
- addConstantValue(Die, isUnsignedDIType(DD, Ty), MO.getImm());
-}
-
void DwarfUnit::addConstantValue(DIE &Die, uint64_t Val, const DIType *Ty) {
- addConstantValue(Die, isUnsignedDIType(DD, Ty), Val);
+ addConstantValue(Die, DD->isUnsignedDIType(Ty), Val);
}
void DwarfUnit::addConstantValue(DIE &Die, bool Unsigned, uint64_t Val) {
@@ -557,7 +456,7 @@ void DwarfUnit::addConstantValue(DIE &Die, bool Unsigned, uint64_t Val) {
}
void DwarfUnit::addConstantValue(DIE &Die, const APInt &Val, const DIType *Ty) {
- addConstantValue(Die, Val, isUnsignedDIType(DD, Ty));
+ addConstantValue(Die, Val, DD->isUnsignedDIType(Ty));
}
void DwarfUnit::addConstantValue(DIE &Die, const APInt &Val, bool Unsigned) {
@@ -654,6 +553,8 @@ DIE *DwarfUnit::createTypeDIE(const DIScope *Context, DIE &ContextDIE,
if (auto *BT = dyn_cast<DIBasicType>(Ty))
constructTypeDIE(TyDIE, BT);
+ else if (auto *ST = dyn_cast<DIStringType>(Ty))
+ constructTypeDIE(TyDIE, ST);
else if (auto *STy = dyn_cast<DISubroutineType>(Ty))
constructTypeDIE(TyDIE, STy);
else if (auto *CTy = dyn_cast<DICompositeType>(Ty)) {
@@ -772,8 +673,9 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIBasicType *BTy) {
if (BTy->getTag() == dwarf::DW_TAG_unspecified_type)
return;
- addUInt(Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
- BTy->getEncoding());
+ if (BTy->getTag() != dwarf::DW_TAG_string_type)
+ addUInt(Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
+ BTy->getEncoding());
uint64_t Size = BTy->getSizeInBits() >> 3;
addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size);
@@ -784,6 +686,37 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIBasicType *BTy) {
addUInt(Buffer, dwarf::DW_AT_endianity, None, dwarf::DW_END_little);
}
+void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIStringType *STy) {
+ // Get core information.
+ StringRef Name = STy->getName();
+ // Add name if not anonymous or intermediate type.
+ if (!Name.empty())
+ addString(Buffer, dwarf::DW_AT_name, Name);
+
+ if (DIVariable *Var = STy->getStringLength()) {
+ if (auto *VarDIE = getDIE(Var))
+ addDIEEntry(Buffer, dwarf::DW_AT_string_length, *VarDIE);
+ } else if (DIExpression *Expr = STy->getStringLengthExp()) {
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+ DIEDwarfExpression DwarfExpr(*Asm, getCU(), *Loc);
+ // This is to describe the memory location of the
+ // length of a Fortran deferred length string, so
+ // lock it down as such.
+ DwarfExpr.setMemoryLocationKind();
+ DwarfExpr.addExpression(Expr);
+ addBlock(Buffer, dwarf::DW_AT_string_length, DwarfExpr.finalize());
+ } else {
+ uint64_t Size = STy->getSizeInBits() >> 3;
+ addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size);
+ }
+
+ if (STy->getEncoding()) {
+ // For eventual Unicode support.
+ addUInt(Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
+ STy->getEncoding());
+ }
+}
+
void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy) {
// Get core information.
StringRef Name = DTy->getName();
@@ -910,6 +843,11 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
}
}
+ // Add template parameters to a class, structure or union types.
+ if (Tag == dwarf::DW_TAG_class_type ||
+ Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type)
+ addTemplateParams(Buffer, CTy->getTemplateParams());
+
// Add elements to structure type.
DINodeArray Elements = CTy->getElements();
for (const auto *Element : Elements) {
@@ -929,7 +867,7 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
DIE &Variant = createAndAddDIE(dwarf::DW_TAG_variant, Buffer);
if (const ConstantInt *CI =
dyn_cast_or_null<ConstantInt>(DDTy->getDiscriminantValue())) {
- if (isUnsignedDIType(DD, Discriminator->getBaseType()))
+ if (DD->isUnsignedDIType(Discriminator->getBaseType()))
addUInt(Variant, dwarf::DW_AT_discr_value, None, CI->getZExtValue());
else
addSInt(Variant, dwarf::DW_AT_discr_value, None, CI->getSExtValue());
@@ -979,12 +917,6 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
if (CTy->isObjcClassComplete())
addFlag(Buffer, dwarf::DW_AT_APPLE_objc_complete_type);
- // Add template parameters to a class, structure or union types.
- // FIXME: The support isn't in the metadata for this yet.
- if (Tag == dwarf::DW_TAG_class_type ||
- Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type)
- addTemplateParams(Buffer, CTy->getTemplateParams());
-
// Add the type's non-standard calling convention.
uint8_t CC = 0;
if (CTy->isTypePassByValue())
@@ -1008,8 +940,10 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
Tag == dwarf::DW_TAG_class_type || Tag == dwarf::DW_TAG_structure_type ||
Tag == dwarf::DW_TAG_union_type) {
// Add size if non-zero (derived types might be zero-sized.)
+ // Ignore the size if it's a non-enum forward decl.
// TODO: Do we care about size for enum forward declarations?
- if (Size)
+ if (Size &&
+ (!CTy->isForwardDecl() || Tag == dwarf::DW_TAG_enumeration_type))
addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size);
else if (!CTy->isForwardDecl())
// Add zero size if it is not a forward declaration.
@@ -1133,6 +1067,8 @@ DIE *DwarfUnit::getOrCreateModule(const DIModule *M) {
getOrCreateSourceID(M->getFile()));
if (M->getLineNo())
addUInt(MDie, dwarf::DW_AT_decl_line, None, M->getLineNo());
+ if (M->getIsDecl())
+ addFlag(MDie, dwarf::DW_AT_declaration);
return &MDie;
}
@@ -1354,7 +1290,7 @@ void DwarfUnit::constructSubrangeDIE(DIE &Buffer, const DISubrange *SR,
if (auto *CI = SR->getCount().dyn_cast<ConstantInt*>())
Count = CI->getSExtValue();
- auto addBoundTypeEntry = [&](dwarf::Attribute Attr,
+ auto AddBoundTypeEntry = [&](dwarf::Attribute Attr,
DISubrange::BoundType Bound) -> void {
if (auto *BV = Bound.dyn_cast<DIVariable *>()) {
if (auto *VarDIE = getDIE(BV))
@@ -1372,7 +1308,7 @@ void DwarfUnit::constructSubrangeDIE(DIE &Buffer, const DISubrange *SR,
}
};
- addBoundTypeEntry(dwarf::DW_AT_lower_bound, SR->getLowerBound());
+ AddBoundTypeEntry(dwarf::DW_AT_lower_bound, SR->getLowerBound());
if (auto *CV = SR->getCount().dyn_cast<DIVariable*>()) {
if (auto *CountVarDIE = getDIE(CV))
@@ -1380,9 +1316,45 @@ void DwarfUnit::constructSubrangeDIE(DIE &Buffer, const DISubrange *SR,
} else if (Count != -1)
addUInt(DW_Subrange, dwarf::DW_AT_count, None, Count);
- addBoundTypeEntry(dwarf::DW_AT_upper_bound, SR->getUpperBound());
+ AddBoundTypeEntry(dwarf::DW_AT_upper_bound, SR->getUpperBound());
- addBoundTypeEntry(dwarf::DW_AT_byte_stride, SR->getStride());
+ AddBoundTypeEntry(dwarf::DW_AT_byte_stride, SR->getStride());
+}
+
+void DwarfUnit::constructGenericSubrangeDIE(DIE &Buffer,
+ const DIGenericSubrange *GSR,
+ DIE *IndexTy) {
+ DIE &DwGenericSubrange =
+ createAndAddDIE(dwarf::DW_TAG_generic_subrange, Buffer);
+ addDIEEntry(DwGenericSubrange, dwarf::DW_AT_type, *IndexTy);
+
+ int64_t DefaultLowerBound = getDefaultLowerBound();
+
+ auto AddBoundTypeEntry = [&](dwarf::Attribute Attr,
+ DIGenericSubrange::BoundType Bound) -> void {
+ if (auto *BV = Bound.dyn_cast<DIVariable *>()) {
+ if (auto *VarDIE = getDIE(BV))
+ addDIEEntry(DwGenericSubrange, Attr, *VarDIE);
+ } else if (auto *BE = Bound.dyn_cast<DIExpression *>()) {
+ if (BE->isSignedConstant()) {
+ if (Attr != dwarf::DW_AT_lower_bound || DefaultLowerBound == -1 ||
+ static_cast<int64_t>(BE->getElement(1)) != DefaultLowerBound)
+ addSInt(DwGenericSubrange, Attr, dwarf::DW_FORM_sdata,
+ BE->getElement(1));
+ } else {
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+ DIEDwarfExpression DwarfExpr(*Asm, getCU(), *Loc);
+ DwarfExpr.setMemoryLocationKind();
+ DwarfExpr.addExpression(BE);
+ addBlock(DwGenericSubrange, Attr, DwarfExpr.finalize());
+ }
+ }
+ };
+
+ AddBoundTypeEntry(dwarf::DW_AT_lower_bound, GSR->getLowerBound());
+ AddBoundTypeEntry(dwarf::DW_AT_count, GSR->getCount());
+ AddBoundTypeEntry(dwarf::DW_AT_upper_bound, GSR->getUpperBound());
+ AddBoundTypeEntry(dwarf::DW_AT_byte_stride, GSR->getStride());
}
DIE *DwarfUnit::getIndexTyDie() {
@@ -1447,6 +1419,39 @@ void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
addBlock(Buffer, dwarf::DW_AT_data_location, DwarfExpr.finalize());
}
+ if (DIVariable *Var = CTy->getAssociated()) {
+ if (auto *VarDIE = getDIE(Var))
+ addDIEEntry(Buffer, dwarf::DW_AT_associated, *VarDIE);
+ } else if (DIExpression *Expr = CTy->getAssociatedExp()) {
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+ DIEDwarfExpression DwarfExpr(*Asm, getCU(), *Loc);
+ DwarfExpr.setMemoryLocationKind();
+ DwarfExpr.addExpression(Expr);
+ addBlock(Buffer, dwarf::DW_AT_associated, DwarfExpr.finalize());
+ }
+
+ if (DIVariable *Var = CTy->getAllocated()) {
+ if (auto *VarDIE = getDIE(Var))
+ addDIEEntry(Buffer, dwarf::DW_AT_allocated, *VarDIE);
+ } else if (DIExpression *Expr = CTy->getAllocatedExp()) {
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+ DIEDwarfExpression DwarfExpr(*Asm, getCU(), *Loc);
+ DwarfExpr.setMemoryLocationKind();
+ DwarfExpr.addExpression(Expr);
+ addBlock(Buffer, dwarf::DW_AT_allocated, DwarfExpr.finalize());
+ }
+
+ if (auto *RankConst = CTy->getRankConst()) {
+ addSInt(Buffer, dwarf::DW_AT_rank, dwarf::DW_FORM_sdata,
+ RankConst->getSExtValue());
+ } else if (auto *RankExpr = CTy->getRankExp()) {
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+ DIEDwarfExpression DwarfExpr(*Asm, getCU(), *Loc);
+ DwarfExpr.setMemoryLocationKind();
+ DwarfExpr.addExpression(RankExpr);
+ addBlock(Buffer, dwarf::DW_AT_rank, DwarfExpr.finalize());
+ }
+
// Emit the element type.
addType(Buffer, CTy->getBaseType());
@@ -1459,15 +1464,19 @@ void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
DINodeArray Elements = CTy->getElements();
for (unsigned i = 0, N = Elements.size(); i < N; ++i) {
// FIXME: Should this really be such a loose cast?
- if (auto *Element = dyn_cast_or_null<DINode>(Elements[i]))
+ if (auto *Element = dyn_cast_or_null<DINode>(Elements[i])) {
if (Element->getTag() == dwarf::DW_TAG_subrange_type)
constructSubrangeDIE(Buffer, cast<DISubrange>(Element), IdxTy);
+ else if (Element->getTag() == dwarf::DW_TAG_generic_subrange)
+ constructGenericSubrangeDIE(Buffer, cast<DIGenericSubrange>(Element),
+ IdxTy);
+ }
}
}
void DwarfUnit::constructEnumTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
const DIType *DTy = CTy->getBaseType();
- bool IsUnsigned = DTy && isUnsignedDIType(DD, DTy);
+ bool IsUnsigned = DTy && DD->isUnsignedDIType(DTy);
if (DTy) {
if (DD->getDwarfVersion() >= 3)
addType(Buffer, DTy);
@@ -1666,15 +1675,15 @@ DIE *DwarfUnit::getOrCreateStaticMemberDIE(const DIDerivedType *DT) {
void DwarfUnit::emitCommonHeader(bool UseOffsets, dwarf::UnitType UT) {
// Emit size of content not including length itself
- Asm->OutStreamer->AddComment("Length of Unit");
if (!DD->useSectionsAsReferences()) {
StringRef Prefix = isDwoUnit() ? "debug_info_dwo_" : "debug_info_";
MCSymbol *BeginLabel = Asm->createTempSymbol(Prefix + "start");
EndLabel = Asm->createTempSymbol(Prefix + "end");
- Asm->emitLabelDifference(EndLabel, BeginLabel, 4);
+ Asm->emitDwarfUnitLength(EndLabel, BeginLabel, "Length of Unit");
Asm->OutStreamer->emitLabel(BeginLabel);
} else
- Asm->emitInt32(getHeaderSize() + getUnitDie().getSize());
+ Asm->emitDwarfUnitLength(getHeaderSize() + getUnitDie().getSize(),
+ "Length of Unit");
Asm->OutStreamer->AddComment("DWARF version number");
unsigned Version = DD->getDwarfVersion();
@@ -1694,7 +1703,7 @@ void DwarfUnit::emitCommonHeader(bool UseOffsets, dwarf::UnitType UT) {
Asm->OutStreamer->AddComment("Offset Into Abbrev. Section");
const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
if (UseOffsets)
- Asm->emitInt32(0);
+ Asm->emitDwarfLengthOrOffset(0);
else
Asm->emitDwarfSymbolReference(
TLOF.getDwarfAbbrevSection()->getBeginSymbol(), false);
@@ -1713,16 +1722,14 @@ void DwarfTypeUnit::emitHeader(bool UseOffsets) {
Asm->OutStreamer->emitIntValue(TypeSignature, sizeof(TypeSignature));
Asm->OutStreamer->AddComment("Type DIE Offset");
// In a skeleton type unit there is no type DIE so emit a zero offset.
- Asm->OutStreamer->emitIntValue(Ty ? Ty->getOffset() : 0,
- sizeof(Ty->getOffset()));
+ Asm->emitDwarfLengthOrOffset(Ty ? Ty->getOffset() : 0);
}
DIE::value_iterator
DwarfUnit::addSectionDelta(DIE &Die, dwarf::Attribute Attribute,
const MCSymbol *Hi, const MCSymbol *Lo) {
return Die.addValue(DIEValueAllocator, Attribute,
- DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset
- : dwarf::DW_FORM_data4,
+ DD->getDwarfSectionOffsetForm(),
new (DIEValueAllocator) DIEDelta(Hi, Lo));
}
@@ -1730,10 +1737,7 @@ DIE::value_iterator
DwarfUnit::addSectionLabel(DIE &Die, dwarf::Attribute Attribute,
const MCSymbol *Label, const MCSymbol *Sec) {
if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
- return addLabel(Die, Attribute,
- DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset
- : dwarf::DW_FORM_data4,
- Label);
+ return addLabel(Die, Attribute, DD->getDwarfSectionOffsetForm(), Label);
return addSectionDelta(Die, Attribute, Label, Sec);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
index 34f3a34ed336..5c643760fd56 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -16,22 +16,19 @@
#include "DwarfDebug.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Optional.h"
-#include "llvm/ADT/StringMap.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/DIE.h"
-#include "llvm/IR/DIBuilder.h"
-#include "llvm/IR/DebugInfo.h"
-#include "llvm/MC/MCDwarf.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCSection.h"
+#include <string>
namespace llvm {
-class MachineOperand;
-class ConstantInt;
class ConstantFP;
+class ConstantInt;
class DbgVariable;
class DwarfCompileUnit;
+class MachineOperand;
+class MCDwarfDwoLineTable;
+class MCSymbol;
//===----------------------------------------------------------------------===//
/// This dwarf writer support class manages information associated with a
@@ -77,7 +74,6 @@ protected:
bool applySubprogramDefinitionAttributes(const DISubprogram *SP, DIE &SPDie);
- bool shareAcrossDWOCUs() const;
bool isShareableAcrossCUs(const DINode *D) const;
public:
@@ -86,8 +82,7 @@ public:
MCSymbol *getEndLabel() const { return EndLabel; }
uint16_t getLanguage() const { return CUNode->getSourceLanguage(); }
const DICompileUnit *getCUNode() const { return CUNode; }
-
- uint16_t getDwarfVersion() const { return DD->getDwarfVersion(); }
+ DwarfDebug &getDwarfDebug() const { return *DD; }
/// Return true if this compile unit has something to write out.
bool hasContent() const { return getUnitDie().hasChildren(); }
@@ -195,7 +190,6 @@ public:
void addSourceLine(DIE &Die, const DIObjCProperty *Ty);
/// Add constant value entry in variable DIE.
- void addConstantValue(DIE &Die, const MachineOperand &MO, const DIType *Ty);
void addConstantValue(DIE &Die, const ConstantInt *CI, const DIType *Ty);
void addConstantValue(DIE &Die, const APInt &Val, const DIType *Ty);
void addConstantValue(DIE &Die, const APInt &Val, bool Unsigned);
@@ -203,7 +197,6 @@ public:
void addConstantValue(DIE &Die, bool Unsigned, uint64_t Val);
/// Add constant value entry in variable DIE.
- void addConstantFPValue(DIE &Die, const MachineOperand &MO);
void addConstantFPValue(DIE &Die, const ConstantFP *CFP);
/// Add a linkage name, if it isn't empty.
@@ -255,9 +248,9 @@ public:
/// Compute the size of a header for this unit, not including the initial
/// length field.
virtual unsigned getHeaderSize() const {
- return sizeof(int16_t) + // DWARF version number
- sizeof(int32_t) + // Offset Into Abbrev. Section
- sizeof(int8_t) + // Pointer Size (in bytes)
+ return sizeof(int16_t) + // DWARF version number
+ Asm->getDwarfOffsetByteSize() + // Offset Into Abbrev. Section
+ sizeof(int8_t) + // Pointer Size (in bytes)
(DD->getDwarfVersion() >= 5 ? sizeof(int8_t)
: 0); // DWARF v5 unit type
}
@@ -284,10 +277,6 @@ public:
const MCSymbol *Label,
const MCSymbol *Sec);
- /// If the \p File has an MD5 checksum, return it as an MD5Result
- /// allocated in the MCContext.
- Optional<MD5::MD5Result> getMD5AsBytes(const DIFile *File) const;
-
/// Get context owner's DIE.
DIE *createTypeDIE(const DICompositeType *Ty);
@@ -306,9 +295,12 @@ protected:
private:
void constructTypeDIE(DIE &Buffer, const DIBasicType *BTy);
+ void constructTypeDIE(DIE &Buffer, const DIStringType *BTy);
void constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy);
void constructTypeDIE(DIE &Buffer, const DISubroutineType *CTy);
void constructSubrangeDIE(DIE &Buffer, const DISubrange *SR, DIE *IndexTy);
+ void constructGenericSubrangeDIE(DIE &Buffer, const DIGenericSubrange *SR,
+ DIE *IndexTy);
void constructArrayTypeDIE(DIE &Buffer, const DICompositeType *CTy);
void constructEnumTypeDIE(DIE &Buffer, const DICompositeType *CTy);
DIE &constructMemberDIE(DIE &Buffer, const DIDerivedType *DT);
@@ -361,7 +353,7 @@ public:
void emitHeader(bool UseOffsets) override;
unsigned getHeaderSize() const override {
return DwarfUnit::getHeaderSize() + sizeof(uint64_t) + // Type Signature
- sizeof(uint32_t); // Type DIE Offset
+ Asm->getDwarfOffsetByteSize(); // Type DIE Offset
}
void addGlobalName(StringRef Name, const DIE &Die,
const DIScope *Context) override;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
index 99ee4567fa58..2ffe8a7b0469 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
@@ -44,15 +44,9 @@ EHStreamer::~EHStreamer() = default;
unsigned EHStreamer::sharedTypeIDs(const LandingPadInfo *L,
const LandingPadInfo *R) {
const std::vector<int> &LIds = L->TypeIds, &RIds = R->TypeIds;
- unsigned LSize = LIds.size(), RSize = RIds.size();
- unsigned MinSize = LSize < RSize ? LSize : RSize;
- unsigned Count = 0;
-
- for (; Count != MinSize; ++Count)
- if (LIds[Count] != RIds[Count])
- return Count;
-
- return Count;
+ return std::mismatch(LIds.begin(), LIds.end(), RIds.begin(), RIds.end())
+ .first -
+ LIds.begin();
}
/// Compute the actions table and gather the first action index for each landing
@@ -220,15 +214,30 @@ void EHStreamer::computePadMap(
/// the landing pad and the action. Calls marked 'nounwind' have no entry and
/// must not be contained in the try-range of any entry - they form gaps in the
/// table. Entries must be ordered by try-range address.
-void EHStreamer::
-computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
- const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
- const SmallVectorImpl<unsigned> &FirstActions) {
+///
+/// Call-sites are split into one or more call-site ranges associated with
+/// different sections of the function.
+///
+/// - Without -basic-block-sections, all call-sites are grouped into one
+/// call-site-range corresponding to the function section.
+///
+/// - With -basic-block-sections, one call-site range is created for each
+/// section, with its FragmentBeginLabel and FragmentEndLabel respectively
+// set to the beginning and ending of the corresponding section and its
+// ExceptionLabel set to the exception symbol dedicated for this section.
+// Later, one LSDA header will be emitted for each call-site range with its
+// call-sites following. The action table and type info table will be
+// shared across all ranges.
+void EHStreamer::computeCallSiteTable(
+ SmallVectorImpl<CallSiteEntry> &CallSites,
+ SmallVectorImpl<CallSiteRange> &CallSiteRanges,
+ const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
+ const SmallVectorImpl<unsigned> &FirstActions) {
RangeMapType PadMap;
computePadMap(LandingPads, PadMap);
// The end label of the previous invoke or nounwind try-range.
- MCSymbol *LastLabel = nullptr;
+ MCSymbol *LastLabel = Asm->getFunctionBegin();
// Whether there is a potentially throwing instruction (currently this means
// an ordinary call) between the end of the previous try-range and now.
@@ -241,6 +250,21 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
// Visit all instructions in order of address.
for (const auto &MBB : *Asm->MF) {
+ if (&MBB == &Asm->MF->front() || MBB.isBeginSection()) {
+ // We start a call-site range upon function entry and at the beginning of
+ // every basic block section.
+ CallSiteRanges.push_back(
+ {Asm->MBBSectionRanges[MBB.getSectionIDNum()].BeginLabel,
+ Asm->MBBSectionRanges[MBB.getSectionIDNum()].EndLabel,
+ Asm->getMBBExceptionSym(MBB), CallSites.size()});
+ PreviousIsInvoke = false;
+ SawPotentiallyThrowing = false;
+ LastLabel = nullptr;
+ }
+
+ if (MBB.isEHPad())
+ CallSiteRanges.back().IsLPRange = true;
+
for (const auto &MI : MBB) {
if (!MI.isEHLabel()) {
if (MI.isCall())
@@ -264,13 +288,14 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
assert(BeginLabel == LandingPad->BeginLabels[P.RangeIndex] &&
"Inconsistent landing pad map!");
- // For Dwarf exception handling (SjLj handling doesn't use this). If some
- // instruction between the previous try-range and this one may throw,
- // create a call-site entry with no landing pad for the region between the
- // try-ranges.
- if (SawPotentiallyThrowing && Asm->MAI->usesCFIForEH()) {
- CallSiteEntry Site = { LastLabel, BeginLabel, nullptr, 0 };
- CallSites.push_back(Site);
+ // For Dwarf and AIX exception handling (SjLj handling doesn't use this).
+ // If some instruction between the previous try-range and this one may
+ // throw, create a call-site entry with no landing pad for the region
+ // between the try-ranges.
+ if (SawPotentiallyThrowing &&
+ (Asm->MAI->usesCFIForEH() ||
+ Asm->MAI->getExceptionHandlingType() == ExceptionHandling::AIX)) {
+ CallSites.push_back({LastLabel, BeginLabel, nullptr, 0});
PreviousIsInvoke = false;
}
@@ -313,14 +338,21 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
PreviousIsInvoke = true;
}
}
- }
- // If some instruction between the previous try-range and the end of the
- // function may throw, create a call-site entry with no landing pad for the
- // region following the try-range.
- if (SawPotentiallyThrowing && !IsSJLJ) {
- CallSiteEntry Site = { LastLabel, nullptr, nullptr, 0 };
- CallSites.push_back(Site);
+ // We end the call-site range upon function exit and at the end of every
+ // basic block section.
+ if (&MBB == &Asm->MF->back() || MBB.isEndSection()) {
+ // If some instruction between the previous try-range and the end of the
+ // function may throw, create a call-site entry with no landing pad for
+ // the region following the try-range.
+ if (SawPotentiallyThrowing && !IsSJLJ) {
+ CallSiteEntry Site = {LastLabel, CallSiteRanges.back().FragmentEndLabel,
+ nullptr, 0};
+ CallSites.push_back(Site);
+ SawPotentiallyThrowing = false;
+ }
+ CallSiteRanges.back().CallSiteEndIdx = CallSites.size();
+ }
}
}
@@ -371,19 +403,25 @@ MCSymbol *EHStreamer::emitExceptionTable() {
SmallVector<unsigned, 64> FirstActions;
computeActionsTable(LandingPads, Actions, FirstActions);
- // Compute the call-site table.
+ // Compute the call-site table and call-site ranges. Normally, there is only
+ // one call-site-range which covers the whole funciton. With
+ // -basic-block-sections, there is one call-site-range per basic block
+ // section.
SmallVector<CallSiteEntry, 64> CallSites;
- computeCallSiteTable(CallSites, LandingPads, FirstActions);
+ SmallVector<CallSiteRange, 4> CallSiteRanges;
+ computeCallSiteTable(CallSites, CallSiteRanges, LandingPads, FirstActions);
bool IsSJLJ = Asm->MAI->getExceptionHandlingType() == ExceptionHandling::SjLj;
bool IsWasm = Asm->MAI->getExceptionHandlingType() == ExceptionHandling::Wasm;
+ bool HasLEB128Directives = Asm->MAI->hasLEB128Directives();
unsigned CallSiteEncoding =
IsSJLJ ? static_cast<unsigned>(dwarf::DW_EH_PE_udata4) :
Asm->getObjFileLowering().getCallSiteEncoding();
bool HaveTTData = !TypeInfos.empty() || !FilterIds.empty();
// Type infos.
- MCSection *LSDASection = Asm->getObjFileLowering().getLSDASection();
+ MCSection *LSDASection =
+ Asm->getObjFileLowering().getSectionForLSDA(MF->getFunction(), Asm->TM);
unsigned TTypeEncoding;
if (!HaveTTData) {
@@ -433,35 +471,122 @@ MCSymbol *EHStreamer::emitExceptionTable() {
Asm->OutContext.getOrCreateSymbol(Twine("GCC_except_table")+
Twine(Asm->getFunctionNumber()));
Asm->OutStreamer->emitLabel(GCCETSym);
- Asm->OutStreamer->emitLabel(Asm->getCurExceptionSym());
-
- // Emit the LSDA header.
- Asm->emitEncodingByte(dwarf::DW_EH_PE_omit, "@LPStart");
- Asm->emitEncodingByte(TTypeEncoding, "@TType");
+ MCSymbol *CstEndLabel = Asm->createTempSymbol(
+ CallSiteRanges.size() > 1 ? "action_table_base" : "cst_end");
MCSymbol *TTBaseLabel = nullptr;
- if (HaveTTData) {
- // N.B.: There is a dependency loop between the size of the TTBase uleb128
- // here and the amount of padding before the aligned type table. The
- // assembler must sometimes pad this uleb128 or insert extra padding before
- // the type table. See PR35809 or GNU as bug 4029.
- MCSymbol *TTBaseRefLabel = Asm->createTempSymbol("ttbaseref");
+ if (HaveTTData)
TTBaseLabel = Asm->createTempSymbol("ttbase");
- Asm->emitLabelDifferenceAsULEB128(TTBaseLabel, TTBaseRefLabel);
- Asm->OutStreamer->emitLabel(TTBaseRefLabel);
- }
- bool VerboseAsm = Asm->OutStreamer->isVerboseAsm();
+ const bool VerboseAsm = Asm->OutStreamer->isVerboseAsm();
+
+ // Helper for emitting references (offsets) for type table and the end of the
+ // call-site table (which marks the beginning of the action table).
+ // * For Itanium, these references will be emitted for every callsite range.
+ // * For SJLJ and Wasm, they will be emitted only once in the LSDA header.
+ auto EmitTypeTableRefAndCallSiteTableEndRef = [&]() {
+ Asm->emitEncodingByte(TTypeEncoding, "@TType");
+ if (HaveTTData) {
+ // N.B.: There is a dependency loop between the size of the TTBase uleb128
+ // here and the amount of padding before the aligned type table. The
+ // assembler must sometimes pad this uleb128 or insert extra padding
+ // before the type table. See PR35809 or GNU as bug 4029.
+ MCSymbol *TTBaseRefLabel = Asm->createTempSymbol("ttbaseref");
+ Asm->emitLabelDifferenceAsULEB128(TTBaseLabel, TTBaseRefLabel);
+ Asm->OutStreamer->emitLabel(TTBaseRefLabel);
+ }
+
+ // The Action table follows the call-site table. So we emit the
+ // label difference from here (start of the call-site table for SJLJ and
+ // Wasm, and start of a call-site range for Itanium) to the end of the
+ // whole call-site table (end of the last call-site range for Itanium).
+ MCSymbol *CstBeginLabel = Asm->createTempSymbol("cst_begin");
+ Asm->emitEncodingByte(CallSiteEncoding, "Call site");
+ Asm->emitLabelDifferenceAsULEB128(CstEndLabel, CstBeginLabel);
+ Asm->OutStreamer->emitLabel(CstBeginLabel);
+ };
+
+ // An alternative path to EmitTypeTableRefAndCallSiteTableEndRef.
+ // For some platforms, the system assembler does not accept the form of
+ // `.uleb128 label2 - label1`. In those situations, we would need to calculate
+ // the size between label1 and label2 manually.
+ // In this case, we would need to calculate the LSDA size and the call
+ // site table size.
+ auto EmitTypeTableOffsetAndCallSiteTableOffset = [&]() {
+ assert(CallSiteEncoding == dwarf::DW_EH_PE_udata4 && !HasLEB128Directives &&
+ "Targets supporting .uleb128 do not need to take this path.");
+ if (CallSiteRanges.size() > 1)
+ report_fatal_error(
+ "-fbasic-block-sections is not yet supported on "
+ "platforms that do not have general LEB128 directive support.");
+
+ uint64_t CallSiteTableSize = 0;
+ const CallSiteRange &CSRange = CallSiteRanges.back();
+ for (size_t CallSiteIdx = CSRange.CallSiteBeginIdx;
+ CallSiteIdx < CSRange.CallSiteEndIdx; ++CallSiteIdx) {
+ const CallSiteEntry &S = CallSites[CallSiteIdx];
+ // Each call site entry consists of 3 udata4 fields (12 bytes) and
+ // 1 ULEB128 field.
+ CallSiteTableSize += 12 + getULEB128Size(S.Action);
+ assert(isUInt<32>(CallSiteTableSize) && "CallSiteTableSize overflows.");
+ }
+
+ Asm->emitEncodingByte(TTypeEncoding, "@TType");
+ if (HaveTTData) {
+ const unsigned ByteSizeOfCallSiteOffset =
+ getULEB128Size(CallSiteTableSize);
+ uint64_t ActionTableSize = 0;
+ for (const ActionEntry &Action : Actions) {
+ // Each action entry consists of two SLEB128 fields.
+ ActionTableSize += getSLEB128Size(Action.ValueForTypeID) +
+ getSLEB128Size(Action.NextAction);
+ assert(isUInt<32>(ActionTableSize) && "ActionTableSize overflows.");
+ }
+
+ const unsigned TypeInfoSize =
+ Asm->GetSizeOfEncodedValue(TTypeEncoding) * MF->getTypeInfos().size();
+
+ const uint64_t LSDASizeBeforeAlign =
+ 1 // Call site encoding byte.
+ + ByteSizeOfCallSiteOffset // ULEB128 encoding of CallSiteTableSize.
+ + CallSiteTableSize // Call site table content.
+ + ActionTableSize; // Action table content.
+
+ const uint64_t LSDASizeWithoutAlign = LSDASizeBeforeAlign + TypeInfoSize;
+ const unsigned ByteSizeOfLSDAWithoutAlign =
+ getULEB128Size(LSDASizeWithoutAlign);
+ const uint64_t DisplacementBeforeAlign =
+ 2 // LPStartEncoding and TypeTableEncoding.
+ + ByteSizeOfLSDAWithoutAlign + LSDASizeBeforeAlign;
+
+ // The type info area starts with 4 byte alignment.
+ const unsigned NeedAlignVal = (4 - DisplacementBeforeAlign % 4) % 4;
+ uint64_t LSDASizeWithAlign = LSDASizeWithoutAlign + NeedAlignVal;
+ const unsigned ByteSizeOfLSDAWithAlign =
+ getULEB128Size(LSDASizeWithAlign);
+
+ // The LSDASizeWithAlign could use 1 byte less padding for alignment
+ // when the data we use to represent the LSDA Size "needs" to be 1 byte
+ // larger than the one previously calculated without alignment.
+ if (ByteSizeOfLSDAWithAlign > ByteSizeOfLSDAWithoutAlign)
+ LSDASizeWithAlign -= 1;
+
+ Asm->OutStreamer->emitULEB128IntValue(LSDASizeWithAlign,
+ ByteSizeOfLSDAWithAlign);
+ }
- // Emit the landing pad call site table.
- MCSymbol *CstBeginLabel = Asm->createTempSymbol("cst_begin");
- MCSymbol *CstEndLabel = Asm->createTempSymbol("cst_end");
- Asm->emitEncodingByte(CallSiteEncoding, "Call site");
- Asm->emitLabelDifferenceAsULEB128(CstEndLabel, CstBeginLabel);
- Asm->OutStreamer->emitLabel(CstBeginLabel);
+ Asm->emitEncodingByte(CallSiteEncoding, "Call site");
+ Asm->OutStreamer->emitULEB128IntValue(CallSiteTableSize);
+ };
// SjLj / Wasm Exception handling
if (IsSJLJ || IsWasm) {
+ Asm->OutStreamer->emitLabel(Asm->getMBBExceptionSym(Asm->MF->front()));
+
+ // emit the LSDA header.
+ Asm->emitEncodingByte(dwarf::DW_EH_PE_omit, "@LPStart");
+ EmitTypeTableRefAndCallSiteTableEndRef();
+
unsigned idx = 0;
for (SmallVectorImpl<CallSiteEntry>::const_iterator
I = CallSites.begin(), E = CallSites.end(); I != E; ++I, ++idx) {
@@ -486,6 +611,7 @@ MCSymbol *EHStreamer::emitExceptionTable() {
}
Asm->emitULEB128(S.Action);
}
+ Asm->OutStreamer->emitLabel(CstEndLabel);
} else {
// Itanium LSDA exception handling
@@ -507,57 +633,127 @@ MCSymbol *EHStreamer::emitExceptionTable() {
// A missing entry in the call-site table indicates that a call is not
// supposed to throw.
+ assert(CallSiteRanges.size() != 0 && "No call-site ranges!");
+
+ // There should be only one call-site range which includes all the landing
+ // pads. Find that call-site range here.
+ const CallSiteRange *LandingPadRange = nullptr;
+ for (const CallSiteRange &CSRange : CallSiteRanges) {
+ if (CSRange.IsLPRange) {
+ assert(LandingPadRange == nullptr &&
+ "All landing pads must be in a single callsite range.");
+ LandingPadRange = &CSRange;
+ }
+ }
+
+ // The call-site table is split into its call-site ranges, each being
+ // emitted as:
+ // [ LPStartEncoding | LPStart ]
+ // [ TypeTableEncoding | TypeTableOffset ]
+ // [ CallSiteEncoding | CallSiteTableEndOffset ]
+ // cst_begin -> { call-site entries contained in this range }
+ //
+ // and is followed by the next call-site range.
+ //
+ // For each call-site range, CallSiteTableEndOffset is computed as the
+ // difference between cst_begin of that range and the last call-site-table's
+ // end label. This offset is used to find the action table.
+
unsigned Entry = 0;
- for (SmallVectorImpl<CallSiteEntry>::const_iterator
- I = CallSites.begin(), E = CallSites.end(); I != E; ++I) {
- const CallSiteEntry &S = *I;
+ for (const CallSiteRange &CSRange : CallSiteRanges) {
+ if (CSRange.CallSiteBeginIdx != 0) {
+ // Align the call-site range for all ranges except the first. The
+ // first range is already aligned due to the exception table alignment.
+ Asm->emitAlignment(Align(4));
+ }
+ Asm->OutStreamer->emitLabel(CSRange.ExceptionLabel);
+
+ // Emit the LSDA header.
+ // If only one call-site range exists, LPStart is omitted as it is the
+ // same as the function entry.
+ if (CallSiteRanges.size() == 1) {
+ Asm->emitEncodingByte(dwarf::DW_EH_PE_omit, "@LPStart");
+ } else if (!Asm->isPositionIndependent()) {
+ // For more than one call-site ranges, LPStart must be explicitly
+ // specified.
+ // For non-PIC we can simply use the absolute value.
+ Asm->emitEncodingByte(dwarf::DW_EH_PE_absptr, "@LPStart");
+ Asm->OutStreamer->emitSymbolValue(LandingPadRange->FragmentBeginLabel,
+ Asm->MAI->getCodePointerSize());
+ } else {
+ // For PIC mode, we Emit a PC-relative address for LPStart.
+ Asm->emitEncodingByte(dwarf::DW_EH_PE_pcrel, "@LPStart");
+ MCContext &Context = Asm->OutStreamer->getContext();
+ MCSymbol *Dot = Context.createTempSymbol();
+ Asm->OutStreamer->emitLabel(Dot);
+ Asm->OutStreamer->emitValue(
+ MCBinaryExpr::createSub(
+ MCSymbolRefExpr::create(LandingPadRange->FragmentBeginLabel,
+ Context),
+ MCSymbolRefExpr::create(Dot, Context), Context),
+ Asm->MAI->getCodePointerSize());
+ }
+
+ if (HasLEB128Directives)
+ EmitTypeTableRefAndCallSiteTableEndRef();
+ else
+ EmitTypeTableOffsetAndCallSiteTableOffset();
+
+ for (size_t CallSiteIdx = CSRange.CallSiteBeginIdx;
+ CallSiteIdx != CSRange.CallSiteEndIdx; ++CallSiteIdx) {
+ const CallSiteEntry &S = CallSites[CallSiteIdx];
+
+ MCSymbol *EHFuncBeginSym = CSRange.FragmentBeginLabel;
+ MCSymbol *EHFuncEndSym = CSRange.FragmentEndLabel;
- MCSymbol *EHFuncBeginSym = Asm->getFunctionBegin();
-
- MCSymbol *BeginLabel = S.BeginLabel;
- if (!BeginLabel)
- BeginLabel = EHFuncBeginSym;
- MCSymbol *EndLabel = S.EndLabel;
- if (!EndLabel)
- EndLabel = Asm->getFunctionEnd();
-
- // Offset of the call site relative to the start of the procedure.
- if (VerboseAsm)
- Asm->OutStreamer->AddComment(">> Call Site " + Twine(++Entry) + " <<");
- Asm->emitCallSiteOffset(BeginLabel, EHFuncBeginSym, CallSiteEncoding);
- if (VerboseAsm)
- Asm->OutStreamer->AddComment(Twine(" Call between ") +
- BeginLabel->getName() + " and " +
- EndLabel->getName());
- Asm->emitCallSiteOffset(EndLabel, BeginLabel, CallSiteEncoding);
-
- // Offset of the landing pad relative to the start of the procedure.
- if (!S.LPad) {
+ MCSymbol *BeginLabel = S.BeginLabel;
+ if (!BeginLabel)
+ BeginLabel = EHFuncBeginSym;
+ MCSymbol *EndLabel = S.EndLabel;
+ if (!EndLabel)
+ EndLabel = EHFuncEndSym;
+
+ // Offset of the call site relative to the start of the procedure.
if (VerboseAsm)
- Asm->OutStreamer->AddComment(" has no landing pad");
- Asm->emitCallSiteValue(0, CallSiteEncoding);
- } else {
+ Asm->OutStreamer->AddComment(">> Call Site " + Twine(++Entry) +
+ " <<");
+ Asm->emitCallSiteOffset(BeginLabel, EHFuncBeginSym, CallSiteEncoding);
if (VerboseAsm)
- Asm->OutStreamer->AddComment(Twine(" jumps to ") +
- S.LPad->LandingPadLabel->getName());
- Asm->emitCallSiteOffset(S.LPad->LandingPadLabel, EHFuncBeginSym,
- CallSiteEncoding);
- }
+ Asm->OutStreamer->AddComment(Twine(" Call between ") +
+ BeginLabel->getName() + " and " +
+ EndLabel->getName());
+ Asm->emitCallSiteOffset(EndLabel, BeginLabel, CallSiteEncoding);
+
+ // Offset of the landing pad relative to the start of the landing pad
+ // fragment.
+ if (!S.LPad) {
+ if (VerboseAsm)
+ Asm->OutStreamer->AddComment(" has no landing pad");
+ Asm->emitCallSiteValue(0, CallSiteEncoding);
+ } else {
+ if (VerboseAsm)
+ Asm->OutStreamer->AddComment(Twine(" jumps to ") +
+ S.LPad->LandingPadLabel->getName());
+ Asm->emitCallSiteOffset(S.LPad->LandingPadLabel,
+ LandingPadRange->FragmentBeginLabel,
+ CallSiteEncoding);
+ }
- // Offset of the first associated action record, relative to the start of
- // the action table. This value is biased by 1 (1 indicates the start of
- // the action table), and 0 indicates that there are no actions.
- if (VerboseAsm) {
- if (S.Action == 0)
- Asm->OutStreamer->AddComment(" On action: cleanup");
- else
- Asm->OutStreamer->AddComment(" On action: " +
- Twine((S.Action - 1) / 2 + 1));
+ // Offset of the first associated action record, relative to the start
+ // of the action table. This value is biased by 1 (1 indicates the start
+ // of the action table), and 0 indicates that there are no actions.
+ if (VerboseAsm) {
+ if (S.Action == 0)
+ Asm->OutStreamer->AddComment(" On action: cleanup");
+ else
+ Asm->OutStreamer->AddComment(" On action: " +
+ Twine((S.Action - 1) / 2 + 1));
+ }
+ Asm->emitULEB128(S.Action);
}
- Asm->emitULEB128(S.Action);
}
+ Asm->OutStreamer->emitLabel(CstEndLabel);
}
- Asm->OutStreamer->emitLabel(CstEndLabel);
// Emit the Action Table.
int Entry = 0;
@@ -587,15 +783,12 @@ MCSymbol *EHStreamer::emitExceptionTable() {
Asm->emitSLEB128(Action.ValueForTypeID);
// Action Record
- //
- // Self-relative signed displacement in bytes of the next action record,
- // or 0 if there is no next action record.
if (VerboseAsm) {
- if (Action.NextAction == 0) {
+ if (Action.Previous == unsigned(-1)) {
Asm->OutStreamer->AddComment(" No further actions");
} else {
- unsigned NextAction = Entry + (Action.NextAction + 1) / 2;
- Asm->OutStreamer->AddComment(" Continue to action "+Twine(NextAction));
+ Asm->OutStreamer->AddComment(" Continue to action " +
+ Twine(Action.Previous + 1));
}
}
Asm->emitSLEB128(Action.NextAction);
@@ -615,7 +808,7 @@ void EHStreamer::emitTypeInfos(unsigned TTypeEncoding, MCSymbol *TTBaseLabel) {
const std::vector<const GlobalValue *> &TypeInfos = MF->getTypeInfos();
const std::vector<unsigned> &FilterIds = MF->getFilterIds();
- bool VerboseAsm = Asm->OutStreamer->isVerboseAsm();
+ const bool VerboseAsm = Asm->OutStreamer->isVerboseAsm();
int Entry = 0;
// Emit the Catch TypeInfos.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h
index e62cf17a05d4..234e62506a56 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h
@@ -69,23 +69,48 @@ protected:
unsigned Action;
};
+ /// Structure describing a contiguous range of call-sites which reside
+ /// in the same procedure fragment. With -fbasic-block-sections, there will
+ /// be one call site range per basic block section. Otherwise, we will have
+ /// one call site range containing all the call sites in the function.
+ struct CallSiteRange {
+ // Symbol marking the beginning of the precedure fragment.
+ MCSymbol *FragmentBeginLabel = nullptr;
+ // Symbol marking the end of the procedure fragment.
+ MCSymbol *FragmentEndLabel = nullptr;
+ // LSDA symbol for this call-site range.
+ MCSymbol *ExceptionLabel = nullptr;
+ // Index of the first call-site entry in the call-site table which
+ // belongs to this range.
+ size_t CallSiteBeginIdx = 0;
+ // Index just after the last call-site entry in the call-site table which
+ // belongs to this range.
+ size_t CallSiteEndIdx = 0;
+ // Whether this is the call-site range containing all the landing pads.
+ bool IsLPRange = false;
+ };
+
/// Compute the actions table and gather the first action index for each
/// landing pad site.
- void computeActionsTable(const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
- SmallVectorImpl<ActionEntry> &Actions,
- SmallVectorImpl<unsigned> &FirstActions);
+ void computeActionsTable(
+ const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
+ SmallVectorImpl<ActionEntry> &Actions,
+ SmallVectorImpl<unsigned> &FirstActions);
void computePadMap(const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
RangeMapType &PadMap);
- /// Compute the call-site table. The entry for an invoke has a try-range
- /// containing the call, a non-zero landing pad and an appropriate action.
- /// The entry for an ordinary call has a try-range containing the call and
- /// zero for the landing pad and the action. Calls marked 'nounwind' have
- /// no entry and must not be contained in the try-range of any entry - they
- /// form gaps in the table. Entries must be ordered by try-range address.
+ /// Compute the call-site table and the call-site ranges. The entry for an
+ /// invoke has a try-range containing the call, a non-zero landing pad and an
+ /// appropriate action. The entry for an ordinary call has a try-range
+ /// containing the call and zero for the landing pad and the action. Calls
+ /// marked 'nounwind' have no entry and must not be contained in the try-range
+ /// of any entry - they form gaps in the table. Entries must be ordered by
+ /// try-range address. CallSiteRanges vector is only populated for Itanium
+ /// exception handling.
virtual void computeCallSiteTable(
SmallVectorImpl<CallSiteEntry> &CallSites,
+ SmallVectorImpl<CallSiteRange> &CallSiteRanges,
const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
const SmallVectorImpl<unsigned> &FirstActions);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
index 8fa83f515910..354b638b47a2 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -145,9 +145,10 @@ void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
report_fatal_error("Function '" + FI.getFunction().getName() +
"' is too large for the ocaml GC! "
"Frame size " +
- Twine(FrameSize) + ">= 65536.\n"
- "(" +
- Twine(uintptr_t(&FI)) + ")");
+ Twine(FrameSize) +
+ ">= 65536.\n"
+ "(" +
+ Twine(reinterpret_cast<uintptr_t>(&FI)) + ")");
}
AP.OutStreamer->AddComment("live roots for " +
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp
new file mode 100644
index 000000000000..e8636052c54c
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp
@@ -0,0 +1,84 @@
+//===- llvm/CodeGen/PseudoProbePrinter.cpp - Pseudo Probe Emission -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing pseudo probe info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PseudoProbePrinter.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PseudoProbe.h"
+#include "llvm/MC/MCPseudoProbe.h"
+#include "llvm/MC/MCStreamer.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "pseudoprobe"
+
+PseudoProbeHandler::~PseudoProbeHandler() = default;
+
+PseudoProbeHandler::PseudoProbeHandler(AsmPrinter *A, Module *M) : Asm(A) {
+ NamedMDNode *FuncInfo = M->getNamedMetadata(PseudoProbeDescMetadataName);
+ assert(FuncInfo && "Pseudo probe descriptors are missing");
+ for (const auto *Operand : FuncInfo->operands()) {
+ const auto *MD = cast<MDNode>(Operand);
+ auto GUID =
+ mdconst::dyn_extract<ConstantInt>(MD->getOperand(0))->getZExtValue();
+ auto Name = cast<MDString>(MD->getOperand(2))->getString();
+ // We may see pairs with same name but different GUIDs here in LTO mode, due
+ // to static same-named functions inlined from other modules into this
+ // module. Function profiles with the same name will be merged no matter
+ // whether they are collected on the same function. Therefore we just pick
+ // up the last <Name, GUID> pair here to represent the same-named function
+ // collection and all probes from the collection will be merged into a
+ // single profile eventually.
+ Names[Name] = GUID;
+ }
+
+ LLVM_DEBUG(dump());
+}
+
+void PseudoProbeHandler::emitPseudoProbe(uint64_t Guid, uint64_t Index,
+ uint64_t Type, uint64_t Attr,
+ const DILocation *DebugLoc) {
+ // Gather all the inlined-at nodes.
+ // When it's done ReversedInlineStack looks like ([66, B], [88, A])
+ // which means, Function A inlines function B at calliste with a probe id 88,
+ // and B inlines C at probe 66 where C is represented by Guid.
+ SmallVector<InlineSite, 8> ReversedInlineStack;
+ auto *InlinedAt = DebugLoc ? DebugLoc->getInlinedAt() : nullptr;
+ while (InlinedAt) {
+ const DISubprogram *SP = InlinedAt->getScope()->getSubprogram();
+ // Use linkage name for C++ if possible.
+ auto Name = SP->getLinkageName();
+ if (Name.empty())
+ Name = SP->getName();
+ assert(Names.count(Name) && "Pseudo probe descriptor missing for function");
+ uint64_t CallerGuid = Names[Name];
+ uint64_t CallerProbeId = PseudoProbeDwarfDiscriminator::extractProbeIndex(
+ InlinedAt->getDiscriminator());
+ ReversedInlineStack.emplace_back(CallerGuid, CallerProbeId);
+ InlinedAt = InlinedAt->getInlinedAt();
+ }
+
+ SmallVector<InlineSite, 8> InlineStack(ReversedInlineStack.rbegin(),
+ ReversedInlineStack.rend());
+ Asm->OutStreamer->emitPseudoProbe(Guid, Index, Type, Attr, InlineStack);
+}
+
+#ifndef NDEBUG
+void PseudoProbeHandler::dump() const {
+ dbgs() << "\n=============================\n";
+ dbgs() << "\nFunction Name to GUID map:\n";
+ dbgs() << "\n=============================\n";
+ for (const auto &Item : Names)
+ dbgs() << "Func: " << Item.first << " GUID: " << Item.second << "\n";
+}
+#endif
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h
new file mode 100644
index 000000000000..bea07ceae9d4
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h
@@ -0,0 +1,53 @@
+//===- PseudoProbePrinter.h - Pseudo probe encoding support -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing pseudo probe info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_PSEUDOPROBEPRINTER_H
+#define LLVM_LIB_CODEGEN_ASMPRINTER_PSEUDOPROBEPRINTER_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/AsmPrinterHandler.h"
+
+namespace llvm {
+
+class AsmPrinter;
+class MCStreamer;
+class Module;
+class DILocation;
+
+class PseudoProbeHandler : public AsmPrinterHandler {
+ // Target of pseudo probe emission.
+ AsmPrinter *Asm;
+ // Name to GUID map
+ DenseMap<StringRef, uint64_t> Names;
+
+public:
+ PseudoProbeHandler(AsmPrinter *A, Module *M);
+ ~PseudoProbeHandler() override;
+
+ void emitPseudoProbe(uint64_t Guid, uint64_t Index, uint64_t Type,
+ uint64_t Attr, const DILocation *DebugLoc);
+
+ // Unused.
+ void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {}
+ void endModule() override {}
+ void beginFunction(const MachineFunction *MF) override {}
+ void endFunction(const MachineFunction *MF) override {}
+ void beginInstruction(const MachineInstr *MI) override {}
+ void endInstruction() override {}
+
+#ifndef NDEBUG
+ void dump() const;
+#endif
+};
+
+} // namespace llvm
+#endif // LLVM_LIB_CODEGEN_ASMPRINTER_PSEUDOPROBEPRINTER_H
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp
index baef4d2cc849..352a33e8639d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp
@@ -18,11 +18,11 @@
using namespace llvm;
void WasmException::endModule() {
- // This is the symbol used in 'throw' and 'br_on_exn' instruction to denote
- // this is a C++ exception. This symbol has to be emitted somewhere once in
- // the module. Check if the symbol has already been created, i.e., we have at
- // least one 'throw' or 'br_on_exn' instruction in the module, and emit the
- // symbol only if so.
+ // This is the symbol used in 'throw' and 'catch' instruction to denote this
+ // is a C++ exception. This symbol has to be emitted somewhere once in the
+ // module. Check if the symbol has already been created, i.e., we have at
+ // least one 'throw' or 'catch' instruction in the module, and emit the symbol
+ // only if so.
SmallString<60> NameStr;
Mangler::getNameWithPrefix(NameStr, "__cpp_exception", Asm->getDataLayout());
if (Asm->OutContext.lookupSymbol(NameStr)) {
@@ -76,6 +76,7 @@ void WasmException::endFunction(const MachineFunction *MF) {
// information.
void WasmException::computeCallSiteTable(
SmallVectorImpl<CallSiteEntry> &CallSites,
+ SmallVectorImpl<CallSiteRange> &CallSiteRanges,
const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
const SmallVectorImpl<unsigned> &FirstActions) {
MachineFunction &MF = *Asm->MF;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.h
index 1893b6b2df43..f06de786bd76 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.h
@@ -32,6 +32,7 @@ protected:
// Compute the call site table for wasm EH.
void computeCallSiteTable(
SmallVectorImpl<CallSiteEntry> &CallSites,
+ SmallVectorImpl<CallSiteRange> &CallSiteRanges,
const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
const SmallVectorImpl<unsigned> &FirstActions) override;
};
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp
index 914308d9147e..1e3f33e70715 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
//
// This file contains support for writing the metadata for Windows Control Flow
-// Guard, including address-taken functions, and valid longjmp targets.
+// Guard, including address-taken functions and valid longjmp targets.
//
//===----------------------------------------------------------------------===//
@@ -17,8 +17,8 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/Metadata.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCStreamer.h"
@@ -38,8 +38,7 @@ void WinCFGuard::endFunction(const MachineFunction *MF) {
return;
// Copy the function's longjmp targets to a module-level list.
- LongjmpTargets.insert(LongjmpTargets.end(), MF->getLongjmpTargets().begin(),
- MF->getLongjmpTargets().end());
+ llvm::append_range(LongjmpTargets, MF->getLongjmpTargets());
}
/// Returns true if this function's address is escaped in a way that might make
@@ -78,20 +77,50 @@ static bool isPossibleIndirectCallTarget(const Function *F) {
return false;
}
+MCSymbol *WinCFGuard::lookupImpSymbol(const MCSymbol *Sym) {
+ if (Sym->getName().startswith("__imp_"))
+ return nullptr;
+ return Asm->OutContext.lookupSymbol(Twine("__imp_") + Sym->getName());
+}
+
void WinCFGuard::endModule() {
const Module *M = Asm->MMI->getModule();
- std::vector<const Function *> Functions;
- for (const Function &F : *M)
- if (isPossibleIndirectCallTarget(&F))
- Functions.push_back(&F);
- if (Functions.empty() && LongjmpTargets.empty())
+ std::vector<const MCSymbol *> GFIDsEntries;
+ std::vector<const MCSymbol *> GIATsEntries;
+ for (const Function &F : *M) {
+ if (isPossibleIndirectCallTarget(&F)) {
+ // If F is a dllimport and has an "__imp_" symbol already defined, add the
+ // "__imp_" symbol to the .giats section.
+ if (F.hasDLLImportStorageClass()) {
+ if (MCSymbol *impSym = lookupImpSymbol(Asm->getSymbol(&F))) {
+ GIATsEntries.push_back(impSym);
+ }
+ }
+ // Add the function's symbol to the .gfids section.
+ // Note: For dllimport functions, MSVC sometimes does not add this symbol
+ // to the .gfids section, but only adds the corresponding "__imp_" symbol
+ // to the .giats section. Here we always add the symbol to the .gfids
+ // section, since this does not introduce security risks.
+ GFIDsEntries.push_back(Asm->getSymbol(&F));
+ }
+ }
+
+ if (GFIDsEntries.empty() && GIATsEntries.empty() && LongjmpTargets.empty())
return;
+
+ // Emit the symbol index of each GFIDs entry to form the .gfids section.
auto &OS = *Asm->OutStreamer;
OS.SwitchSection(Asm->OutContext.getObjectFileInfo()->getGFIDsSection());
- for (const Function *F : Functions)
- OS.EmitCOFFSymbolIndex(Asm->getSymbol(F));
+ for (const MCSymbol *S : GFIDsEntries)
+ OS.EmitCOFFSymbolIndex(S);
+
+ // Emit the symbol index of each GIATs entry to form the .giats section.
+ OS.SwitchSection(Asm->OutContext.getObjectFileInfo()->getGIATsSection());
+ for (const MCSymbol *S : GIATsEntries) {
+ OS.EmitCOFFSymbolIndex(S);
+ }
- // Emit the symbol index of each longjmp target.
+ // Emit the symbol index of each longjmp target to form the .gljmp section.
OS.SwitchSection(Asm->OutContext.getObjectFileInfo()->getGLJMPSection());
for (const MCSymbol *S : LongjmpTargets) {
OS.EmitCOFFSymbolIndex(S);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h
index 494a153b05ba..0e472af52c8f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h
@@ -24,6 +24,7 @@ class LLVM_LIBRARY_VISIBILITY WinCFGuard : public AsmPrinterHandler {
/// Target of directive emission.
AsmPrinter *Asm;
std::vector<const MCSymbol *> LongjmpTargets;
+ MCSymbol *lookupImpSymbol(const MCSymbol *Sym);
public:
WinCFGuard(AsmPrinter *A);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
index cd8077e7d548..3a9c9df79783 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
@@ -137,8 +137,8 @@ void WinException::endFunction(const MachineFunction *MF) {
endFuncletImpl();
- // endFunclet will emit the necessary .xdata tables for x64 SEH.
- if (Per == EHPersonality::MSVC_Win64SEH && MF->hasEHFunclets())
+ // endFunclet will emit the necessary .xdata tables for table-based SEH.
+ if (Per == EHPersonality::MSVC_TableSEH && MF->hasEHFunclets())
return;
if (shouldEmitPersonality || shouldEmitLSDA) {
@@ -151,7 +151,7 @@ void WinException::endFunction(const MachineFunction *MF) {
// Emit the tables appropriate to the personality function in use. If we
// don't recognize the personality, assume it uses an Itanium-style LSDA.
- if (Per == EHPersonality::MSVC_Win64SEH)
+ if (Per == EHPersonality::MSVC_TableSEH)
emitCSpecificHandlerTable(MF);
else if (Per == EHPersonality::MSVC_X86SEH)
emitExceptHandlerTable(MF);
@@ -258,31 +258,35 @@ void WinException::endFuncletImpl() {
if (F.hasPersonalityFn())
Per = classifyEHPersonality(F.getPersonalityFn()->stripPointerCasts());
- // On funclet exit, we emit a fake "function" end marker, so that the call
- // to EmitWinEHHandlerData below can calculate the size of the funclet or
- // function.
- if (isAArch64) {
- MCSection *XData = Asm->OutStreamer->getAssociatedXDataSection(
- Asm->OutStreamer->getCurrentSectionOnly());
- Asm->OutStreamer->SwitchSection(XData);
- }
-
- // Emit an UNWIND_INFO struct describing the prologue.
- Asm->OutStreamer->EmitWinEHHandlerData();
-
if (Per == EHPersonality::MSVC_CXX && shouldEmitPersonality &&
!CurrentFuncletEntry->isCleanupFuncletEntry()) {
+ // Emit an UNWIND_INFO struct describing the prologue.
+ Asm->OutStreamer->EmitWinEHHandlerData();
+
// If this is a C++ catch funclet (or the parent function),
// emit a reference to the LSDA for the parent function.
StringRef FuncLinkageName = GlobalValue::dropLLVMManglingEscape(F.getName());
MCSymbol *FuncInfoXData = Asm->OutContext.getOrCreateSymbol(
Twine("$cppxdata$", FuncLinkageName));
Asm->OutStreamer->emitValue(create32bitRef(FuncInfoXData), 4);
- } else if (Per == EHPersonality::MSVC_Win64SEH && MF->hasEHFunclets() &&
+ } else if (Per == EHPersonality::MSVC_TableSEH && MF->hasEHFunclets() &&
!CurrentFuncletEntry->isEHFuncletEntry()) {
+ // Emit an UNWIND_INFO struct describing the prologue.
+ Asm->OutStreamer->EmitWinEHHandlerData();
+
// If this is the parent function in Win64 SEH, emit the LSDA immediately
// following .seh_handlerdata.
emitCSpecificHandlerTable(MF);
+ } else if (shouldEmitPersonality || shouldEmitLSDA) {
+ // Emit an UNWIND_INFO struct describing the prologue.
+ Asm->OutStreamer->EmitWinEHHandlerData();
+ // In these cases, no further info is written to the .xdata section
+ // right here, but is written by e.g. emitExceptionTable in endFunction()
+ // above.
+ } else {
+ // No need to emit the EH handler data right here if nothing needs
+ // writing to the .xdata section; it will be emitted for all
+ // functions that need it in the end anyway.
}
// Switch back to the funclet start .text section now that we are done
@@ -339,22 +343,24 @@ int WinException::getFrameIndexOffset(int FrameIndex,
const TargetFrameLowering &TFI = *Asm->MF->getSubtarget().getFrameLowering();
Register UnusedReg;
if (Asm->MAI->usesWindowsCFI()) {
- int Offset =
+ StackOffset Offset =
TFI.getFrameIndexReferencePreferSP(*Asm->MF, FrameIndex, UnusedReg,
/*IgnoreSPUpdates*/ true);
assert(UnusedReg ==
Asm->MF->getSubtarget()
.getTargetLowering()
->getStackPointerRegisterToSaveRestore());
- return Offset;
+ return Offset.getFixed();
}
// For 32-bit, offsets should be relative to the end of the EH registration
// node. For 64-bit, it's relative to SP at the end of the prologue.
assert(FuncInfo.EHRegNodeEndOffset != INT_MAX);
- int Offset = TFI.getFrameIndexReference(*Asm->MF, FrameIndex, UnusedReg);
- Offset += FuncInfo.EHRegNodeEndOffset;
- return Offset;
+ StackOffset Offset = TFI.getFrameIndexReference(*Asm->MF, FrameIndex, UnusedReg);
+ Offset += StackOffset::getFixed(FuncInfo.EHRegNodeEndOffset);
+ assert(!Offset.getScalable() &&
+ "Frame offsets with a scalable component are not supported");
+ return Offset.getFixed();
}
namespace {
@@ -951,7 +957,7 @@ void WinException::emitEHRegistrationOffsetLabel(const WinEHFuncInfo &FuncInfo,
int FI = FuncInfo.EHRegNodeFrameIndex;
if (FI != INT_MAX) {
const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering();
- Offset = TFI->getNonLocalFrameIndexReference(*Asm->MF, FI);
+ Offset = TFI->getNonLocalFrameIndexReference(*Asm->MF, FI).getFixed();
}
MCContext &Ctx = Asm->OutContext;
@@ -1015,7 +1021,8 @@ void WinException::emitExceptHandlerTable(const MachineFunction *MF) {
Register UnusedReg;
const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
int SSPIdx = MFI.getStackProtectorIndex();
- GSCookieOffset = TFI->getFrameIndexReference(*MF, SSPIdx, UnusedReg);
+ GSCookieOffset =
+ TFI->getFrameIndexReference(*MF, SSPIdx, UnusedReg).getFixed();
}
// Retrieve the EH Guard slot.
@@ -1025,7 +1032,8 @@ void WinException::emitExceptHandlerTable(const MachineFunction *MF) {
Register UnusedReg;
const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
int EHGuardIdx = FuncInfo.EHGuardFrameIndex;
- EHCookieOffset = TFI->getFrameIndexReference(*MF, EHGuardIdx, UnusedReg);
+ EHCookieOffset =
+ TFI->getFrameIndexReference(*MF, EHGuardIdx, UnusedReg).getFixed();
}
AddComment("GSCookieOffset");
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp
index c61531c5141a..4026022caa07 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -1507,8 +1507,8 @@ void AtomicExpand::expandAtomicLoadToLibcall(LoadInst *I) {
bool expanded = expandAtomicOpToLibcall(
I, Size, I->getAlign(), I->getPointerOperand(), nullptr, nullptr,
I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
- (void)expanded;
- assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor Load");
+ if (!expanded)
+ report_fatal_error("expandAtomicOpToLibcall shouldn't fail for Load");
}
void AtomicExpand::expandAtomicStoreToLibcall(StoreInst *I) {
@@ -1520,8 +1520,8 @@ void AtomicExpand::expandAtomicStoreToLibcall(StoreInst *I) {
bool expanded = expandAtomicOpToLibcall(
I, Size, I->getAlign(), I->getPointerOperand(), I->getValueOperand(),
nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
- (void)expanded;
- assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor Store");
+ if (!expanded)
+ report_fatal_error("expandAtomicOpToLibcall shouldn't fail for Store");
}
void AtomicExpand::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) {
@@ -1535,8 +1535,8 @@ void AtomicExpand::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) {
I, Size, I->getAlign(), I->getPointerOperand(), I->getNewValOperand(),
I->getCompareOperand(), I->getSuccessOrdering(), I->getFailureOrdering(),
Libcalls);
- (void)expanded;
- assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor CAS");
+ if (!expanded)
+ report_fatal_error("expandAtomicOpToLibcall shouldn't fail for CAS");
}
static ArrayRef<RTLIB::Libcall> GetRMWLibcall(AtomicRMWInst::BinOp Op) {
@@ -1685,6 +1685,11 @@ bool AtomicExpand::expandAtomicOpToLibcall(
return false;
}
+ if (!TLI->getLibcallName(RTLibType)) {
+ // This target does not implement the requested atomic libcall so give up.
+ return false;
+ }
+
// Build up the function call. There's two kinds. First, the sized
// variants. These calls are going to be one of the following (with
// N=1,2,4,8,16):
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BBSectionsPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSections.cpp
index a35c4d813acc..7499ea8b42d4 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/BBSectionsPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSections.cpp
@@ -1,4 +1,4 @@
-//===-- BBSectionsPrepare.cpp ---=========---------------------------------===//
+//===-- BasicBlockSections.cpp ---=========--------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-// BBSectionsPrepare implementation.
+// BasicBlockSections implementation.
//
// The purpose of this pass is to assign sections to basic blocks when
// -fbasic-block-sections= option is used. Further, with profile information
@@ -48,19 +48,11 @@
// Basic Block Labels
// ==================
//
-// With -fbasic-block-sections=labels, or when a basic block is placed in a
-// unique section, it is labelled with a symbol. This allows easy mapping of
-// virtual addresses from PMU profiles back to the corresponding basic blocks.
-// Since the number of basic blocks is large, the labeling bloats the symbol
-// table sizes and the string table sizes significantly. While the binary size
-// does increase, it does not affect performance as the symbol table is not
-// loaded in memory during run-time. The string table size bloat is kept very
-// minimal using a unary naming scheme that uses string suffix compression. The
-// basic blocks for function foo are named "a.BB.foo", "aa.BB.foo", ... This
-// turns out to be very good for string table sizes and the bloat in the string
-// table size for a very large binary is ~8 %. The naming also allows using
-// the --symbol-ordering-file option in LLD to arbitrarily reorder the
-// sections.
+// With -fbasic-block-sections=labels, we emit the offsets of BB addresses of
+// every function into the .llvm_bb_addr_map section. Along with the function
+// symbols, this allows for mapping of virtual addresses in PMU profiles back to
+// the corresponding basic blocks. This logic is implemented in AsmPrinter. This
+// pass only assigns the BBSectionType of every function to ``labels``.
//
//===----------------------------------------------------------------------===//
@@ -69,6 +61,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/BasicBlockSectionUtils.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
@@ -86,6 +79,15 @@ using llvm::StringMap;
using llvm::StringRef;
using namespace llvm;
+// Placing the cold clusters in a separate section mitigates against poor
+// profiles and allows optimizations such as hugepage mapping to be applied at a
+// section granularity. Defaults to ".text.split." which is recognized by lld
+// via the `-z keep-text-section-prefix` flag.
+cl::opt<std::string> llvm::BBSectionsColdTextPrefix(
+ "bbsections-cold-text-prefix",
+ cl::desc("The text prefix to use for cold basic block clusters"),
+ cl::init(".text.split."), cl::Hidden);
+
namespace {
// This struct represents the cluster information for a machine basic block.
@@ -100,7 +102,7 @@ struct BBClusterInfo {
using ProgramBBClusterInfoMapTy = StringMap<SmallVector<BBClusterInfo, 4>>;
-class BBSectionsPrepare : public MachineFunctionPass {
+class BasicBlockSections : public MachineFunctionPass {
public:
static char ID;
@@ -119,13 +121,13 @@ public:
// name for which we have mapping in ProgramBBClusterInfo.
StringMap<StringRef> FuncAliasMap;
- BBSectionsPrepare(const MemoryBuffer *Buf)
+ BasicBlockSections(const MemoryBuffer *Buf)
: MachineFunctionPass(ID), MBuf(Buf) {
- initializeBBSectionsPreparePass(*PassRegistry::getPassRegistry());
+ initializeBasicBlockSectionsPass(*PassRegistry::getPassRegistry());
};
- BBSectionsPrepare() : MachineFunctionPass(ID) {
- initializeBBSectionsPreparePass(*PassRegistry::getPassRegistry());
+ BasicBlockSections() : MachineFunctionPass(ID) {
+ initializeBasicBlockSectionsPass(*PassRegistry::getPassRegistry());
}
StringRef getPassName() const override {
@@ -144,8 +146,8 @@ public:
} // end anonymous namespace
-char BBSectionsPrepare::ID = 0;
-INITIALIZE_PASS(BBSectionsPrepare, "bbsections-prepare",
+char BasicBlockSections::ID = 0;
+INITIALIZE_PASS(BasicBlockSections, "bbsections-prepare",
"Prepares for basic block sections, by splitting functions "
"into clusters of basic blocks.",
false, false)
@@ -226,9 +228,9 @@ static bool getBBClusterInfoForFunction(
// and "Cold" succeeding all other clusters.
// FuncBBClusterInfo represent the cluster information for basic blocks. If this
// is empty, it means unique sections for all basic blocks in the function.
-static bool assignSectionsAndSortBasicBlocks(
- MachineFunction &MF,
- const std::vector<Optional<BBClusterInfo>> &FuncBBClusterInfo) {
+static void
+assignSections(MachineFunction &MF,
+ const std::vector<Optional<BBClusterInfo>> &FuncBBClusterInfo) {
assert(MF.hasBBSections() && "BB Sections is not set for function.");
// This variable stores the section ID of the cluster containing eh_pads (if
// all eh_pads are one cluster). If more than one cluster contain eh_pads, we
@@ -271,12 +273,69 @@ static bool assignSectionsAndSortBasicBlocks(
for (auto &MBB : MF)
if (MBB.isEHPad())
MBB.setSectionID(EHPadsSectionID.getValue());
+}
+void llvm::sortBasicBlocksAndUpdateBranches(
+ MachineFunction &MF, MachineBasicBlockComparator MBBCmp) {
SmallVector<MachineBasicBlock *, 4> PreLayoutFallThroughs(
MF.getNumBlockIDs());
for (auto &MBB : MF)
PreLayoutFallThroughs[MBB.getNumber()] = MBB.getFallThrough();
+ MF.sort(MBBCmp);
+
+ // Set IsBeginSection and IsEndSection according to the assigned section IDs.
+ MF.assignBeginEndSections();
+
+ // After reordering basic blocks, we must update basic block branches to
+ // insert explicit fallthrough branches when required and optimize branches
+ // when possible.
+ updateBranches(MF, PreLayoutFallThroughs);
+}
+
+// If the exception section begins with a landing pad, that landing pad will
+// assume a zero offset (relative to @LPStart) in the LSDA. However, a value of
+// zero implies "no landing pad." This function inserts a NOP just before the EH
+// pad label to ensure a nonzero offset. Returns true if padding is not needed.
+static bool avoidZeroOffsetLandingPad(MachineFunction &MF) {
+ for (auto &MBB : MF) {
+ if (MBB.isBeginSection() && MBB.isEHPad()) {
+ MachineBasicBlock::iterator MI = MBB.begin();
+ while (!MI->isEHLabel())
+ ++MI;
+ MCInst Noop;
+ MF.getSubtarget().getInstrInfo()->getNoop(Noop);
+ BuildMI(MBB, MI, DebugLoc(),
+ MF.getSubtarget().getInstrInfo()->get(Noop.getOpcode()));
+ return false;
+ }
+ }
+ return true;
+}
+
+bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) {
+ auto BBSectionsType = MF.getTarget().getBBSectionsType();
+ assert(BBSectionsType != BasicBlockSection::None &&
+ "BB Sections not enabled!");
+ // Renumber blocks before sorting them for basic block sections. This is
+ // useful during sorting, basic blocks in the same section will retain the
+ // default order. This renumbering should also be done for basic block
+ // labels to match the profiles with the correct blocks.
+ MF.RenumberBlocks();
+
+ if (BBSectionsType == BasicBlockSection::Labels) {
+ MF.setBBSectionsType(BBSectionsType);
+ return true;
+ }
+
+ std::vector<Optional<BBClusterInfo>> FuncBBClusterInfo;
+ if (BBSectionsType == BasicBlockSection::List &&
+ !getBBClusterInfoForFunction(MF, FuncAliasMap, ProgramBBClusterInfo,
+ FuncBBClusterInfo))
+ return true;
+ MF.setBBSectionsType(BBSectionsType);
+ assignSections(MF, FuncBBClusterInfo);
+
// We make sure that the cluster including the entry basic block precedes all
// other clusters.
auto EntryBBSectionID = MF.front().getSectionID();
@@ -300,7 +359,8 @@ static bool assignSectionsAndSortBasicBlocks(
// contiguous and ordered accordingly. Furthermore, clusters are ordered in
// increasing order of their section IDs, with the exception and the
// cold section placed at the end of the function.
- MF.sort([&](MachineBasicBlock &X, MachineBasicBlock &Y) {
+ auto Comparator = [&](const MachineBasicBlock &X,
+ const MachineBasicBlock &Y) {
auto XSectionID = X.getSectionID();
auto YSectionID = Y.getSectionID();
if (XSectionID != YSectionID)
@@ -311,43 +371,10 @@ static bool assignSectionsAndSortBasicBlocks(
return FuncBBClusterInfo[X.getNumber()]->PositionInCluster <
FuncBBClusterInfo[Y.getNumber()]->PositionInCluster;
return X.getNumber() < Y.getNumber();
- });
-
- // Set IsBeginSection and IsEndSection according to the assigned section IDs.
- MF.assignBeginEndSections();
-
- // After reordering basic blocks, we must update basic block branches to
- // insert explicit fallthrough branches when required and optimize branches
- // when possible.
- updateBranches(MF, PreLayoutFallThroughs);
-
- return true;
-}
-
-bool BBSectionsPrepare::runOnMachineFunction(MachineFunction &MF) {
- auto BBSectionsType = MF.getTarget().getBBSectionsType();
- assert(BBSectionsType != BasicBlockSection::None &&
- "BB Sections not enabled!");
- // Renumber blocks before sorting them for basic block sections. This is
- // useful during sorting, basic blocks in the same section will retain the
- // default order. This renumbering should also be done for basic block
- // labels to match the profiles with the correct blocks.
- MF.RenumberBlocks();
-
- if (BBSectionsType == BasicBlockSection::Labels) {
- MF.setBBSectionsType(BBSectionsType);
- MF.createBBLabels();
- return true;
- }
+ };
- std::vector<Optional<BBClusterInfo>> FuncBBClusterInfo;
- if (BBSectionsType == BasicBlockSection::List &&
- !getBBClusterInfoForFunction(MF, FuncAliasMap, ProgramBBClusterInfo,
- FuncBBClusterInfo))
- return true;
- MF.setBBSectionsType(BBSectionsType);
- MF.createBBLabels();
- assignSectionsAndSortBasicBlocks(MF, FuncBBClusterInfo);
+ sortBasicBlocksAndUpdateBranches(MF, Comparator);
+ avoidZeroOffsetLandingPad(MF);
return true;
}
@@ -438,7 +465,7 @@ static Error getBBClusterInfo(const MemoryBuffer *MBuf,
return Error::success();
}
-bool BBSectionsPrepare::doInitialization(Module &M) {
+bool BasicBlockSections::doInitialization(Module &M) {
if (!MBuf)
return false;
if (auto Err = getBBClusterInfo(MBuf, ProgramBBClusterInfo, FuncAliasMap))
@@ -446,12 +473,12 @@ bool BBSectionsPrepare::doInitialization(Module &M) {
return false;
}
-void BBSectionsPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
+void BasicBlockSections::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
MachineFunctionPass::getAnalysisUsage(AU);
}
MachineFunctionPass *
-llvm::createBBSectionsPreparePass(const MemoryBuffer *Buf) {
- return new BBSectionsPrepare(Buf);
+llvm::createBasicBlockSectionsPass(const MemoryBuffer *Buf) {
+ return new BasicBlockSections(Buf);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp
index c6d5aa37834f..fd3f465fb390 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp
@@ -18,16 +18,12 @@
#include "BranchFolding.h"
#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/Analysis.h"
-#include "llvm/CodeGen/LivePhysRegs.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -64,7 +60,6 @@
#include <cstddef>
#include <iterator>
#include <numeric>
-#include <vector>
using namespace llvm;
@@ -139,17 +134,18 @@ bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {
MF.getSubtarget().getRegisterInfo());
}
-BranchFolder::BranchFolder(bool defaultEnableTailMerge, bool CommonHoist,
+BranchFolder::BranchFolder(bool DefaultEnableTailMerge, bool CommonHoist,
MBFIWrapper &FreqInfo,
const MachineBranchProbabilityInfo &ProbInfo,
- ProfileSummaryInfo *PSI,
- unsigned MinTailLength)
+ ProfileSummaryInfo *PSI, unsigned MinTailLength)
: EnableHoistCommonCode(CommonHoist), MinCommonTailLength(MinTailLength),
MBBFreqInfo(FreqInfo), MBPI(ProbInfo), PSI(PSI) {
if (MinCommonTailLength == 0)
MinCommonTailLength = TailMergeSize;
switch (FlagEnableTailMerge) {
- case cl::BOU_UNSET: EnableTailMerge = defaultEnableTailMerge; break;
+ case cl::BOU_UNSET:
+ EnableTailMerge = DefaultEnableTailMerge;
+ break;
case cl::BOU_TRUE: EnableTailMerge = true; break;
case cl::BOU_FALSE: EnableTailMerge = false; break;
}
@@ -1407,7 +1403,7 @@ ReoptimizeBlock:
LLVM_DEBUG(dbgs() << "\nMerging into block: " << PrevBB
<< "From MBB: " << *MBB);
// Remove redundant DBG_VALUEs first.
- if (PrevBB.begin() != PrevBB.end()) {
+ if (!PrevBB.empty()) {
MachineBasicBlock::iterator PrevBBIter = PrevBB.end();
--PrevBBIter;
MachineBasicBlock::iterator MBBIter = MBB->begin();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.h b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.h
index 49c6bcae2db4..2a4ea92a92aa 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.h
@@ -32,8 +32,7 @@ class TargetRegisterInfo;
class LLVM_LIBRARY_VISIBILITY BranchFolder {
public:
- explicit BranchFolder(bool defaultEnableTailMerge,
- bool CommonHoist,
+ explicit BranchFolder(bool DefaultEnableTailMerge, bool CommonHoist,
MBFIWrapper &FreqInfo,
const MachineBranchProbabilityInfo &ProbInfo,
ProfileSummaryInfo *PSI,
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp
index 5a3ec1a36f96..366c303614d6 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp
@@ -507,25 +507,31 @@ bool BranchRelaxation::relaxBranchInstructions() {
Next = std::next(J);
MachineInstr &MI = *J;
- if (MI.isConditionalBranch()) {
- MachineBasicBlock *DestBB = TII->getBranchDestBlock(MI);
- if (!isBlockInRange(MI, *DestBB)) {
- if (Next != MBB.end() && Next->isConditionalBranch()) {
- // If there are multiple conditional branches, this isn't an
- // analyzable block. Split later terminators into a new block so
- // each one will be analyzable.
-
- splitBlockBeforeInstr(*Next, DestBB);
- } else {
- fixupConditionalBranch(MI);
- ++NumConditionalRelaxed;
- }
+ if (!MI.isConditionalBranch())
+ continue;
+
+ if (MI.getOpcode() == TargetOpcode::FAULTING_OP)
+ // FAULTING_OP's destination is not encoded in the instruction stream
+ // and thus never needs relaxed.
+ continue;
+
+ MachineBasicBlock *DestBB = TII->getBranchDestBlock(MI);
+ if (!isBlockInRange(MI, *DestBB)) {
+ if (Next != MBB.end() && Next->isConditionalBranch()) {
+ // If there are multiple conditional branches, this isn't an
+ // analyzable block. Split later terminators into a new block so
+ // each one will be analyzable.
+
+ splitBlockBeforeInstr(*Next, DestBB);
+ } else {
+ fixupConditionalBranch(MI);
+ ++NumConditionalRelaxed;
+ }
- Changed = true;
+ Changed = true;
- // This may have modified all of the terminators, so start over.
- Next = MBB.getFirstTerminator();
- }
+ // This may have modified all of the terminators, so start over.
+ Next = MBB.getFirstTerminator();
}
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BreakFalseDeps.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BreakFalseDeps.cpp
index b01a264dd97d..b11db3e65770 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/BreakFalseDeps.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/BreakFalseDeps.cpp
@@ -118,7 +118,7 @@ bool BreakFalseDeps::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,
if (!MO.isRenamable())
return false;
- Register OriginalReg = MO.getReg();
+ MCRegister OriginalReg = MO.getReg().asMCReg();
// Update only undef operands that have reg units that are mapped to one root.
for (MCRegUnitIterator Unit(OriginalReg, TRI); Unit.isValid(); ++Unit) {
@@ -171,8 +171,8 @@ bool BreakFalseDeps::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,
bool BreakFalseDeps::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx,
unsigned Pref) {
- Register reg = MI->getOperand(OpIdx).getReg();
- unsigned Clearance = RDA->getClearance(MI, reg);
+ MCRegister Reg = MI->getOperand(OpIdx).getReg().asMCReg();
+ unsigned Clearance = RDA->getClearance(MI, Reg);
LLVM_DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref);
if (Pref > Clearance) {
@@ -186,17 +186,24 @@ bool BreakFalseDeps::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx,
void BreakFalseDeps::processDefs(MachineInstr *MI) {
assert(!MI->isDebugInstr() && "Won't process debug values");
+ const MCInstrDesc &MCID = MI->getDesc();
+
// Break dependence on undef uses. Do this before updating LiveRegs below.
// This can remove a false dependence with no additional instructions.
- unsigned OpNum;
- unsigned Pref = TII->getUndefRegClearance(*MI, OpNum, TRI);
- if (Pref) {
- bool HadTrueDependency = pickBestRegisterForUndef(MI, OpNum, Pref);
- // We don't need to bother trying to break a dependency if this
- // instruction has a true dependency on that register through another
- // operand - we'll have to wait for it to be available regardless.
- if (!HadTrueDependency && shouldBreakDependence(MI, OpNum, Pref))
- UndefReads.push_back(std::make_pair(MI, OpNum));
+ for (unsigned i = MCID.getNumDefs(), e = MCID.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.getReg() || !MO.isUse() || !MO.isUndef())
+ continue;
+
+ unsigned Pref = TII->getUndefRegClearance(*MI, i, TRI);
+ if (Pref) {
+ bool HadTrueDependency = pickBestRegisterForUndef(MI, i, Pref);
+ // We don't need to bother trying to break a dependency if this
+ // instruction has a true dependency on that register through another
+ // operand - we'll have to wait for it to be available regardless.
+ if (!HadTrueDependency && shouldBreakDependence(MI, i, Pref))
+ UndefReads.push_back(std::make_pair(MI, i));
+ }
}
// The code below allows the target to create a new instruction to break the
@@ -204,7 +211,6 @@ void BreakFalseDeps::processDefs(MachineInstr *MI) {
if (MF->getFunction().hasMinSize())
return;
- const MCInstrDesc &MCID = MI->getDesc();
for (unsigned i = 0,
e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs();
i != e; ++i) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp
index 5d6ee09c8438..16f380c1eb62 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp
@@ -28,66 +28,59 @@ using namespace llvm;
#define DEBUG_TYPE "calcspillweights"
-void llvm::calculateSpillWeightsAndHints(LiveIntervals &LIS,
- MachineFunction &MF,
- VirtRegMap *VRM,
- const MachineLoopInfo &MLI,
- const MachineBlockFrequencyInfo &MBFI,
- VirtRegAuxInfo::NormalizingFn norm) {
+void VirtRegAuxInfo::calculateSpillWeightsAndHints() {
LLVM_DEBUG(dbgs() << "********** Compute Spill Weights **********\n"
<< "********** Function: " << MF.getName() << '\n');
MachineRegisterInfo &MRI = MF.getRegInfo();
- VirtRegAuxInfo VRAI(MF, LIS, VRM, MLI, MBFI, norm);
- for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = Register::index2VirtReg(i);
+ for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
+ unsigned Reg = Register::index2VirtReg(I);
if (MRI.reg_nodbg_empty(Reg))
continue;
- VRAI.calculateSpillWeightAndHint(LIS.getInterval(Reg));
+ calculateSpillWeightAndHint(LIS.getInterval(Reg));
}
}
// Return the preferred allocation register for reg, given a COPY instruction.
-static Register copyHint(const MachineInstr *mi, unsigned reg,
- const TargetRegisterInfo &tri,
- const MachineRegisterInfo &mri) {
- unsigned sub, hsub;
- Register hreg;
- if (mi->getOperand(0).getReg() == reg) {
- sub = mi->getOperand(0).getSubReg();
- hreg = mi->getOperand(1).getReg();
- hsub = mi->getOperand(1).getSubReg();
+static Register copyHint(const MachineInstr *MI, unsigned Reg,
+ const TargetRegisterInfo &TRI,
+ const MachineRegisterInfo &MRI) {
+ unsigned Sub, HSub;
+ Register HReg;
+ if (MI->getOperand(0).getReg() == Reg) {
+ Sub = MI->getOperand(0).getSubReg();
+ HReg = MI->getOperand(1).getReg();
+ HSub = MI->getOperand(1).getSubReg();
} else {
- sub = mi->getOperand(1).getSubReg();
- hreg = mi->getOperand(0).getReg();
- hsub = mi->getOperand(0).getSubReg();
+ Sub = MI->getOperand(1).getSubReg();
+ HReg = MI->getOperand(0).getReg();
+ HSub = MI->getOperand(0).getSubReg();
}
- if (!hreg)
+ if (!HReg)
return 0;
- if (Register::isVirtualRegister(hreg))
- return sub == hsub ? hreg : Register();
+ if (Register::isVirtualRegister(HReg))
+ return Sub == HSub ? HReg : Register();
- const TargetRegisterClass *rc = mri.getRegClass(reg);
- Register CopiedPReg = (hsub ? tri.getSubReg(hreg, hsub) : hreg);
+ const TargetRegisterClass *rc = MRI.getRegClass(Reg);
+ MCRegister CopiedPReg = HSub ? TRI.getSubReg(HReg, HSub) : HReg.asMCReg();
if (rc->contains(CopiedPReg))
return CopiedPReg;
// Check if reg:sub matches so that a super register could be hinted.
- if (sub)
- return tri.getMatchingSuperReg(CopiedPReg, sub, rc);
+ if (Sub)
+ return TRI.getMatchingSuperReg(CopiedPReg, Sub, rc);
return 0;
}
// Check if all values in LI are rematerializable
-static bool isRematerializable(const LiveInterval &LI,
- const LiveIntervals &LIS,
- VirtRegMap *VRM,
+static bool isRematerializable(const LiveInterval &LI, const LiveIntervals &LIS,
+ const VirtRegMap &VRM,
const TargetInstrInfo &TII) {
- unsigned Reg = LI.reg;
- unsigned Original = VRM ? VRM->getOriginal(Reg) : 0;
+ unsigned Reg = LI.reg();
+ unsigned Original = VRM.getOriginal(Reg);
for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end();
I != E; ++I) {
const VNInfo *VNI = *I;
@@ -102,31 +95,28 @@ static bool isRematerializable(const LiveInterval &LI,
// Trace copies introduced by live range splitting. The inline
// spiller can rematerialize through these copies, so the spill
// weight must reflect this.
- if (VRM) {
- while (MI->isFullCopy()) {
- // The copy destination must match the interval register.
- if (MI->getOperand(0).getReg() != Reg)
- return false;
-
- // Get the source register.
- Reg = MI->getOperand(1).getReg();
-
- // If the original (pre-splitting) registers match this
- // copy came from a split.
- if (!Register::isVirtualRegister(Reg) ||
- VRM->getOriginal(Reg) != Original)
- return false;
-
- // Follow the copy live-in value.
- const LiveInterval &SrcLI = LIS.getInterval(Reg);
- LiveQueryResult SrcQ = SrcLI.Query(VNI->def);
- VNI = SrcQ.valueIn();
- assert(VNI && "Copy from non-existing value");
- if (VNI->isPHIDef())
- return false;
- MI = LIS.getInstructionFromIndex(VNI->def);
- assert(MI && "Dead valno in interval");
- }
+ while (MI->isFullCopy()) {
+ // The copy destination must match the interval register.
+ if (MI->getOperand(0).getReg() != Reg)
+ return false;
+
+ // Get the source register.
+ Reg = MI->getOperand(1).getReg();
+
+ // If the original (pre-splitting) registers match this
+ // copy came from a split.
+ if (!Register::isVirtualRegister(Reg) || VRM.getOriginal(Reg) != Original)
+ return false;
+
+ // Follow the copy live-in value.
+ const LiveInterval &SrcLI = LIS.getInterval(Reg);
+ LiveQueryResult SrcQ = SrcLI.Query(VNI->def);
+ VNI = SrcQ.valueIn();
+ assert(VNI && "Copy from non-existing value");
+ if (VNI->isPHIDef())
+ return false;
+ MI = LIS.getInstructionFromIndex(VNI->def);
+ assert(MI && "Dead valno in interval");
}
if (!TII.isTriviallyReMaterializable(*MI, LIS.getAliasAnalysis()))
@@ -135,43 +125,55 @@ static bool isRematerializable(const LiveInterval &LI,
return true;
}
-void VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) {
- float weight = weightCalcHelper(li);
+void VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &LI) {
+ float Weight = weightCalcHelper(LI);
// Check if unspillable.
- if (weight < 0)
+ if (Weight < 0)
return;
- li.weight = weight;
+ LI.setWeight(Weight);
}
-float VirtRegAuxInfo::futureWeight(LiveInterval &li, SlotIndex start,
- SlotIndex end) {
- return weightCalcHelper(li, &start, &end);
+float VirtRegAuxInfo::futureWeight(LiveInterval &LI, SlotIndex Start,
+ SlotIndex End) {
+ return weightCalcHelper(LI, &Start, &End);
}
-float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start,
- SlotIndex *end) {
- MachineRegisterInfo &mri = MF.getRegInfo();
- const TargetRegisterInfo &tri = *MF.getSubtarget().getRegisterInfo();
- MachineBasicBlock *mbb = nullptr;
- MachineLoop *loop = nullptr;
- bool isExiting = false;
- float totalWeight = 0;
- unsigned numInstr = 0; // Number of instructions using li
- SmallPtrSet<MachineInstr*, 8> visited;
-
- std::pair<unsigned, unsigned> TargetHint = mri.getRegAllocationHint(li.reg);
+float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start,
+ SlotIndex *End) {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ MachineBasicBlock *MBB = nullptr;
+ MachineLoop *Loop = nullptr;
+ bool IsExiting = false;
+ float TotalWeight = 0;
+ unsigned NumInstr = 0; // Number of instructions using LI
+ SmallPtrSet<MachineInstr *, 8> Visited;
+
+ std::pair<Register, Register> TargetHint = MRI.getRegAllocationHint(LI.reg());
+
+ if (LI.isSpillable()) {
+ Register Reg = LI.reg();
+ Register Original = VRM.getOriginal(Reg);
+ const LiveInterval &OrigInt = LIS.getInterval(Original);
+ // li comes from a split of OrigInt. If OrigInt was marked
+ // as not spillable, make sure the new interval is marked
+ // as not spillable as well.
+ if (!OrigInt.isSpillable())
+ LI.markNotSpillable();
+ }
// Don't recompute spill weight for an unspillable register.
- bool Spillable = li.isSpillable();
+ bool IsSpillable = LI.isSpillable();
- bool localSplitArtifact = start && end;
+ bool IsLocalSplitArtifact = Start && End;
// Do not update future local split artifacts.
- bool updateLI = !localSplitArtifact;
+ bool ShouldUpdateLI = !IsLocalSplitArtifact;
- if (localSplitArtifact) {
- MachineBasicBlock *localMBB = LIS.getMBBFromIndex(*end);
- assert(localMBB == LIS.getMBBFromIndex(*start) &&
+ if (IsLocalSplitArtifact) {
+ MachineBasicBlock *localMBB = LIS.getMBBFromIndex(*End);
+ assert(localMBB == LIS.getMBBFromIndex(*Start) &&
"start and end are expected to be in the same basic block");
// Local split artifact will have 2 additional copy instructions and they
@@ -179,116 +181,119 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start,
// localLI = COPY other
// ...
// other = COPY localLI
- totalWeight += LiveIntervals::getSpillWeight(true, false, &MBFI, localMBB);
- totalWeight += LiveIntervals::getSpillWeight(false, true, &MBFI, localMBB);
+ TotalWeight += LiveIntervals::getSpillWeight(true, false, &MBFI, localMBB);
+ TotalWeight += LiveIntervals::getSpillWeight(false, true, &MBFI, localMBB);
- numInstr += 2;
+ NumInstr += 2;
}
// CopyHint is a sortable hint derived from a COPY instruction.
struct CopyHint {
- unsigned Reg;
- float Weight;
- bool IsPhys;
- CopyHint(unsigned R, float W, bool P) :
- Reg(R), Weight(W), IsPhys(P) {}
- bool operator<(const CopyHint &rhs) const {
+ const Register Reg;
+ const float Weight;
+ CopyHint(Register R, float W) : Reg(R), Weight(W) {}
+ bool operator<(const CopyHint &Rhs) const {
// Always prefer any physreg hint.
- if (IsPhys != rhs.IsPhys)
- return (IsPhys && !rhs.IsPhys);
- if (Weight != rhs.Weight)
- return (Weight > rhs.Weight);
- return Reg < rhs.Reg; // Tie-breaker.
+ if (Reg.isPhysical() != Rhs.Reg.isPhysical())
+ return Reg.isPhysical();
+ if (Weight != Rhs.Weight)
+ return (Weight > Rhs.Weight);
+ return Reg.id() < Rhs.Reg.id(); // Tie-breaker.
}
};
- std::set<CopyHint> CopyHints;
+ std::set<CopyHint> CopyHints;
+ DenseMap<unsigned, float> Hint;
for (MachineRegisterInfo::reg_instr_nodbg_iterator
- I = mri.reg_instr_nodbg_begin(li.reg),
- E = mri.reg_instr_nodbg_end();
+ I = MRI.reg_instr_nodbg_begin(LI.reg()),
+ E = MRI.reg_instr_nodbg_end();
I != E;) {
- MachineInstr *mi = &*(I++);
+ MachineInstr *MI = &*(I++);
// For local split artifacts, we are interested only in instructions between
// the expected start and end of the range.
- SlotIndex si = LIS.getInstructionIndex(*mi);
- if (localSplitArtifact && ((si < *start) || (si > *end)))
+ SlotIndex SI = LIS.getInstructionIndex(*MI);
+ if (IsLocalSplitArtifact && ((SI < *Start) || (SI > *End)))
continue;
- numInstr++;
- if (mi->isIdentityCopy() || mi->isImplicitDef())
+ NumInstr++;
+ if (MI->isIdentityCopy() || MI->isImplicitDef())
continue;
- if (!visited.insert(mi).second)
+ if (!Visited.insert(MI).second)
continue;
- float weight = 1.0f;
- if (Spillable) {
+ // For terminators that produce values, ask the backend if the register is
+ // not spillable.
+ if (TII.isUnspillableTerminator(MI) && MI->definesRegister(LI.reg())) {
+ LI.markNotSpillable();
+ return -1.0f;
+ }
+
+ float Weight = 1.0f;
+ if (IsSpillable) {
// Get loop info for mi.
- if (mi->getParent() != mbb) {
- mbb = mi->getParent();
- loop = Loops.getLoopFor(mbb);
- isExiting = loop ? loop->isLoopExiting(mbb) : false;
+ if (MI->getParent() != MBB) {
+ MBB = MI->getParent();
+ Loop = Loops.getLoopFor(MBB);
+ IsExiting = Loop ? Loop->isLoopExiting(MBB) : false;
}
// Calculate instr weight.
- bool reads, writes;
- std::tie(reads, writes) = mi->readsWritesVirtualRegister(li.reg);
- weight = LiveIntervals::getSpillWeight(writes, reads, &MBFI, *mi);
+ bool Reads, Writes;
+ std::tie(Reads, Writes) = MI->readsWritesVirtualRegister(LI.reg());
+ Weight = LiveIntervals::getSpillWeight(Writes, Reads, &MBFI, *MI);
// Give extra weight to what looks like a loop induction variable update.
- if (writes && isExiting && LIS.isLiveOutOfMBB(li, mbb))
- weight *= 3;
+ if (Writes && IsExiting && LIS.isLiveOutOfMBB(LI, MBB))
+ Weight *= 3;
- totalWeight += weight;
+ TotalWeight += Weight;
}
// Get allocation hints from copies.
- if (!mi->isCopy())
+ if (!MI->isCopy())
continue;
- Register hint = copyHint(mi, li.reg, tri, mri);
- if (!hint)
+ Register HintReg = copyHint(MI, LI.reg(), TRI, MRI);
+ if (!HintReg)
continue;
// Force hweight onto the stack so that x86 doesn't add hidden precision,
// making the comparison incorrectly pass (i.e., 1 > 1 == true??).
//
// FIXME: we probably shouldn't use floats at all.
- volatile float hweight = Hint[hint] += weight;
- if (Register::isVirtualRegister(hint) || mri.isAllocatable(hint))
- CopyHints.insert(
- CopyHint(hint, hweight, Register::isPhysicalRegister(hint)));
+ volatile float HWeight = Hint[HintReg] += Weight;
+ if (HintReg.isVirtual() || MRI.isAllocatable(HintReg))
+ CopyHints.insert(CopyHint(HintReg, HWeight));
}
- Hint.clear();
-
// Pass all the sorted copy hints to mri.
- if (updateLI && CopyHints.size()) {
+ if (ShouldUpdateLI && CopyHints.size()) {
// Remove a generic hint if previously added by target.
if (TargetHint.first == 0 && TargetHint.second)
- mri.clearSimpleHint(li.reg);
+ MRI.clearSimpleHint(LI.reg());
- std::set<unsigned> HintedRegs;
+ std::set<Register> HintedRegs;
for (auto &Hint : CopyHints) {
if (!HintedRegs.insert(Hint.Reg).second ||
(TargetHint.first != 0 && Hint.Reg == TargetHint.second))
// Don't add the same reg twice or the target-type hint again.
continue;
- mri.addRegAllocationHint(li.reg, Hint.Reg);
+ MRI.addRegAllocationHint(LI.reg(), Hint.Reg);
}
// Weakly boost the spill weight of hinted registers.
- totalWeight *= 1.01F;
+ TotalWeight *= 1.01F;
}
// If the live interval was already unspillable, leave it that way.
- if (!Spillable)
+ if (!IsSpillable)
return -1.0;
// Mark li as unspillable if all live ranges are tiny and the interval
// is not live at any reg mask. If the interval is live at a reg mask
// spilling may be required.
- if (updateLI && li.isZeroLength(LIS.getSlotIndexes()) &&
- !li.isLiveAtIndexes(LIS.getRegMaskSlots())) {
- li.markNotSpillable();
+ if (ShouldUpdateLI && LI.isZeroLength(LIS.getSlotIndexes()) &&
+ !LI.isLiveAtIndexes(LIS.getRegMaskSlots())) {
+ LI.markNotSpillable();
return -1.0;
}
@@ -296,10 +301,10 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start,
// it is a preferred candidate for spilling.
// FIXME: this gets much more complicated once we support non-trivial
// re-materialization.
- if (isRematerializable(li, LIS, VRM, *MF.getSubtarget().getInstrInfo()))
- totalWeight *= 0.5F;
+ if (isRematerializable(LI, LIS, VRM, *MF.getSubtarget().getInstrInfo()))
+ TotalWeight *= 0.5F;
- if (localSplitArtifact)
- return normalize(totalWeight, start->distance(*end), numInstr);
- return normalize(totalWeight, li.getSize(), numInstr);
+ if (IsLocalSplitArtifact)
+ return normalize(TotalWeight, Start->distance(*End), NumInstr);
+ return normalize(TotalWeight, LI.getSize(), NumInstr);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CallingConvLower.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CallingConvLower.cpp
index 3d8c2c8b00aa..c9246f6e8754 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CallingConvLower.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CallingConvLower.cpp
@@ -13,6 +13,7 @@
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -62,6 +63,11 @@ void CCState::MarkAllocated(MCPhysReg Reg) {
UsedRegs[*AI / 32] |= 1 << (*AI & 31);
}
+void CCState::MarkUnallocated(MCPhysReg Reg) {
+ for (MCRegAliasIterator AI(Reg, &TRI, true); AI.isValid(); ++AI)
+ UsedRegs[*AI / 32] &= ~(1 << (*AI & 31));
+}
+
bool CCState::IsShadowAllocatedReg(MCRegister Reg) const {
if (!isAllocated(Reg))
return false;
@@ -184,14 +190,17 @@ void CCState::AnalyzeCallResult(MVT VT, CCAssignFn Fn) {
}
}
+void CCState::ensureMaxAlignment(Align Alignment) {
+ if (!AnalyzingMustTailForwardedRegs)
+ MF.getFrameInfo().ensureMaxAlignment(Alignment);
+}
+
static bool isValueTypeInRegForCC(CallingConv::ID CC, MVT VT) {
if (VT.isVector())
return true; // Assume -msse-regparm might be in effect.
if (!VT.isInteger())
return false;
- if (CC == CallingConv::X86_VectorCall || CC == CallingConv::X86_FastCall)
- return true;
- return false;
+ return (CC == CallingConv::X86_VectorCall || CC == CallingConv::X86_FastCall);
}
void CCState::getRemainingRegParmsForType(SmallVectorImpl<MCPhysReg> &Regs,
@@ -207,8 +216,8 @@ void CCState::getRemainingRegParmsForType(SmallVectorImpl<MCPhysReg> &Regs,
// Allocate something of this value type repeatedly until we get assigned a
// location in memory.
- bool HaveRegParm = true;
- while (HaveRegParm) {
+ bool HaveRegParm;
+ do {
if (Fn(0, VT, VT, CCValAssign::Full, Flags, *this)) {
#ifndef NDEBUG
dbgs() << "Call has unhandled type " << EVT(VT).getEVTString()
@@ -217,7 +226,7 @@ void CCState::getRemainingRegParmsForType(SmallVectorImpl<MCPhysReg> &Regs,
llvm_unreachable(nullptr);
}
HaveRegParm = Locs.back().isRegLoc();
- }
+ } while (HaveRegParm);
// Copy all the registers from the value locations we added.
assert(NumLocs < Locs.size() && "CC assignment failed to add location");
@@ -248,7 +257,7 @@ void CCState::analyzeMustTailForwardedRegisters(
const TargetLowering *TL = MF.getSubtarget().getTargetLowering();
const TargetRegisterClass *RC = TL->getRegClassFor(RegVT);
for (MCPhysReg PReg : RemainingRegs) {
- unsigned VReg = MF.addLiveIn(PReg, RC);
+ Register VReg = MF.addLiveIn(PReg, RC);
Forwards.push_back(ForwardedRegister(VReg, PReg, RegVT));
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp
index 7a8c022c82da..d2400d0371e3 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp
@@ -20,16 +20,17 @@ using namespace llvm;
/// initializeCodeGen - Initialize all passes linked into the CodeGen library.
void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeAtomicExpandPass(Registry);
- initializeBBSectionsPreparePass(Registry);
+ initializeBasicBlockSectionsPass(Registry);
initializeBranchFolderPassPass(Registry);
initializeBranchRelaxationPass(Registry);
initializeCFGuardLongjmpPass(Registry);
initializeCFIInstrInserterPass(Registry);
+ initializeCheckDebugMachineModulePass(Registry);
initializeCodeGenPreparePass(Registry);
initializeDeadMachineInstructionElimPass(Registry);
initializeDebugifyMachineModulePass(Registry);
initializeDetectDeadLanesPass(Registry);
- initializeDwarfEHPreparePass(Registry);
+ initializeDwarfEHPrepareLegacyPassPass(Registry);
initializeEarlyIfConverterPass(Registry);
initializeEarlyIfPredicatorPass(Registry);
initializeEarlyMachineLICMPass(Registry);
@@ -98,7 +99,6 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeRegisterCoalescerPass(Registry);
initializeRenameIndependentSubregsPass(Registry);
initializeSafeStackLegacyPassPass(Registry);
- initializeScalarizeMaskedMemIntrinPass(Registry);
initializeShrinkWrapPass(Registry);
initializeSjLjEHPreparePass(Registry);
initializeSlotIndexesPass(Registry);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPassBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPassBuilder.cpp
new file mode 100644
index 000000000000..7f37f2069a3b
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPassBuilder.cpp
@@ -0,0 +1,25 @@
+//===--- CodeGenPassBuilder.cpp --------------------------------------- ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines interfaces to access the target independent code
+// generation passes provided by the LLVM backend.
+//
+//===---------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/CodeGenPassBuilder.h"
+
+using namespace llvm;
+
+namespace llvm {
+#define DUMMY_MACHINE_MODULE_PASS(NAME, PASS_NAME, CONSTRUCTOR) \
+ AnalysisKey PASS_NAME::Key;
+#include "llvm/CodeGen/MachinePassRegistry.def"
+#define DUMMY_MACHINE_FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR) \
+ AnalysisKey PASS_NAME::Key;
+#include "llvm/CodeGen/MachinePassRegistry.def"
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp
index e8b8e6c93cf0..b2bc75c19709 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -376,6 +376,7 @@ class TypePromotionTransaction;
return *DT;
}
+ void removeAllAssertingVHReferences(Value *V);
bool eliminateFallThrough(Function &F);
bool eliminateMostlyEmptyBlocks(Function &F);
BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB);
@@ -383,6 +384,7 @@ class TypePromotionTransaction;
void eliminateMostlyEmptyBlock(BasicBlock *BB);
bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB,
bool isPreheader);
+ bool makeBitReverse(Instruction &I);
bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT);
bool optimizeInst(Instruction *I, bool &ModifiedDT);
bool optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
@@ -437,7 +439,11 @@ char CodeGenPrepare::ID = 0;
INITIALIZE_PASS_BEGIN(CodeGenPrepare, DEBUG_TYPE,
"Optimize for code generation", false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(CodeGenPrepare, DEBUG_TYPE,
"Optimize for code generation", false, false)
@@ -466,13 +472,21 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
OptSize = F.hasOptSize();
if (ProfileGuidedSectionPrefix) {
- if (PSI->isFunctionHotInCallGraph(&F, *BFI))
- F.setSectionPrefix(".hot");
- else if (PSI->isFunctionColdInCallGraph(&F, *BFI))
- F.setSectionPrefix(".unlikely");
+ // The hot attribute overwrites profile count based hotness while profile
+ // counts based hotness overwrite the cold attribute.
+ // This is a conservative behabvior.
+ if (F.hasFnAttribute(Attribute::Hot) ||
+ PSI->isFunctionHotInCallGraph(&F, *BFI))
+ F.setSectionPrefix("hot");
+ // If PSI shows this function is not hot, we will placed the function
+ // into unlikely section if (1) PSI shows this is a cold function, or
+ // (2) the function has a attribute of cold.
+ else if (PSI->isFunctionColdInCallGraph(&F, *BFI) ||
+ F.hasFnAttribute(Attribute::Cold))
+ F.setSectionPrefix("unlikely");
else if (ProfileUnknownInSpecialSection && PSI->hasPartialSampleProfile() &&
PSI->isFunctionHotnessUnknown(F))
- F.setSectionPrefix(".unknown");
+ F.setSectionPrefix("unknown");
}
/// This optimization identifies DIV instructions that can be
@@ -538,6 +552,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
LargeOffsetGEPID.clear();
}
+ NewGEPBases.clear();
SunkAddrs.clear();
if (!DisableBranchOpts) {
@@ -547,13 +562,13 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
// are removed.
SmallSetVector<BasicBlock*, 8> WorkList;
for (BasicBlock &BB : F) {
- SmallVector<BasicBlock *, 2> Successors(succ_begin(&BB), succ_end(&BB));
+ SmallVector<BasicBlock *, 2> Successors(successors(&BB));
MadeChange |= ConstantFoldTerminator(&BB, true);
if (!MadeChange) continue;
for (SmallVectorImpl<BasicBlock*>::iterator
II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
- if (pred_begin(*II) == pred_end(*II))
+ if (pred_empty(*II))
WorkList.insert(*II);
}
@@ -561,13 +576,13 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
MadeChange |= !WorkList.empty();
while (!WorkList.empty()) {
BasicBlock *BB = WorkList.pop_back_val();
- SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB));
+ SmallVector<BasicBlock*, 2> Successors(successors(BB));
DeleteDeadBlock(BB);
for (SmallVectorImpl<BasicBlock*>::iterator
II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
- if (pred_begin(*II) == pred_end(*II))
+ if (pred_empty(*II))
WorkList.insert(*II);
}
@@ -601,6 +616,33 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
return EverMadeChange;
}
+/// An instruction is about to be deleted, so remove all references to it in our
+/// GEP-tracking data strcutures.
+void CodeGenPrepare::removeAllAssertingVHReferences(Value *V) {
+ LargeOffsetGEPMap.erase(V);
+ NewGEPBases.erase(V);
+
+ auto GEP = dyn_cast<GetElementPtrInst>(V);
+ if (!GEP)
+ return;
+
+ LargeOffsetGEPID.erase(GEP);
+
+ auto VecI = LargeOffsetGEPMap.find(GEP->getPointerOperand());
+ if (VecI == LargeOffsetGEPMap.end())
+ return;
+
+ auto &GEPVector = VecI->second;
+ const auto &I =
+ llvm::find_if(GEPVector, [=](auto &Elt) { return Elt.first == GEP; });
+ if (I == GEPVector.end())
+ return;
+
+ GEPVector.erase(I);
+ if (GEPVector.empty())
+ LargeOffsetGEPMap.erase(VecI);
+}
+
// Verify BFI has been updated correctly by recomputing BFI and comparing them.
void LLVM_ATTRIBUTE_UNUSED CodeGenPrepare::verifyBFIUpdates(Function &F) {
DominatorTree NewDT(F);
@@ -619,9 +661,10 @@ bool CodeGenPrepare::eliminateFallThrough(Function &F) {
// Use a temporary array to avoid iterator being invalidated when
// deleting blocks.
SmallVector<WeakTrackingVH, 16> Blocks;
- for (auto &Block : llvm::make_range(std::next(F.begin()), F.end()))
+ for (auto &Block : llvm::drop_begin(F))
Blocks.push_back(&Block);
+ SmallSet<WeakTrackingVH, 16> Preds;
for (auto &Block : Blocks) {
auto *BB = cast_or_null<BasicBlock>(Block);
if (!BB)
@@ -640,8 +683,16 @@ bool CodeGenPrepare::eliminateFallThrough(Function &F) {
// Merge BB into SinglePred and delete it.
MergeBlockIntoPredecessor(BB);
+ Preds.insert(SinglePred);
}
}
+
+ // (Repeatedly) merging blocks into their predecessors can create redundant
+ // debug intrinsics.
+ for (auto &Pred : Preds)
+ if (auto *BB = cast_or_null<BasicBlock>(Pred))
+ RemoveRedundantDbgInstrs(BB);
+
return Changed;
}
@@ -686,7 +737,7 @@ bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) {
SmallVector<Loop *, 16> LoopList(LI->begin(), LI->end());
while (!LoopList.empty()) {
Loop *L = LoopList.pop_back_val();
- LoopList.insert(LoopList.end(), L->begin(), L->end());
+ llvm::append_range(LoopList, *L);
if (BasicBlock *Preheader = L->getLoopPreheader())
Preheaders.insert(Preheader);
}
@@ -696,7 +747,7 @@ bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) {
// as we remove them.
// Note that this intentionally skips the entry block.
SmallVector<WeakTrackingVH, 16> Blocks;
- for (auto &Block : llvm::make_range(std::next(F.begin()), F.end()))
+ for (auto &Block : llvm::drop_begin(F))
Blocks.push_back(&Block);
for (auto &Block : Blocks) {
@@ -2011,7 +2062,14 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
switch (II->getIntrinsicID()) {
default: break;
case Intrinsic::assume: {
+ Value *Operand = II->getOperand(0);
II->eraseFromParent();
+ // Prune the operand, it's most likely dead.
+ resetIteratorIfInvalidatedWhileCalling(BB, [&]() {
+ RecursivelyDeleteTriviallyDeadInstructions(
+ Operand, TLInfo, nullptr,
+ [&](Value *V) { removeAllAssertingVHReferences(V); });
+ });
return true;
}
@@ -2172,8 +2230,7 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT
EVI = dyn_cast<ExtractValueInst>(V);
if (EVI) {
V = EVI->getOperand(0);
- if (!std::all_of(EVI->idx_begin(), EVI->idx_end(),
- [](unsigned idx) { return idx == 0; }))
+ if (!llvm::all_of(EVI->indices(), [](unsigned idx) { return idx == 0; }))
return false;
}
@@ -2192,13 +2249,12 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT
// Skip over debug and the bitcast.
do {
++BI;
- } while (isa<DbgInfoIntrinsic>(BI) || &*BI == BCI || &*BI == EVI);
+ } while (isa<DbgInfoIntrinsic>(BI) || &*BI == BCI || &*BI == EVI ||
+ isa<PseudoProbeInst>(BI));
if (&*BI != RetI)
return false;
} else {
- BasicBlock::iterator BI = BB->begin();
- while (isa<DbgInfoIntrinsic>(BI)) ++BI;
- if (&*BI != RetI)
+ if (BB->getFirstNonPHIOrDbg(true) != RetI)
return false;
}
@@ -2223,18 +2279,12 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT
for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) {
if (!VisitedBBs.insert(*PI).second)
continue;
-
- BasicBlock::InstListType &InstList = (*PI)->getInstList();
- BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin();
- BasicBlock::InstListType::reverse_iterator RE = InstList.rend();
- do { ++RI; } while (RI != RE && isa<DbgInfoIntrinsic>(&*RI));
- if (RI == RE)
- continue;
-
- CallInst *CI = dyn_cast<CallInst>(&*RI);
- if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) &&
- attributesPermitTailCall(F, CI, RetI, *TLI))
- TailCallBBs.push_back(*PI);
+ if (Instruction *I = (*PI)->rbegin()->getPrevNonDebugInstruction(true)) {
+ CallInst *CI = dyn_cast<CallInst>(I);
+ if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) &&
+ attributesPermitTailCall(F, CI, RetI, *TLI))
+ TailCallBBs.push_back(*PI);
+ }
}
}
@@ -2258,7 +2308,7 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT
}
// If we eliminated all predecessors of the block, delete the block now.
- if (Changed && !BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB))
+ if (Changed && !BB->hasAddressTaken() && pred_empty(BB))
BB->eraseFromParent();
return Changed;
@@ -3109,9 +3159,7 @@ public:
/// \returns whether the element is actually removed, i.e. was in the
/// collection before the operation.
bool erase(PHINode *Ptr) {
- auto it = NodeMap.find(Ptr);
- if (it != NodeMap.end()) {
- NodeMap.erase(Ptr);
+ if (NodeMap.erase(Ptr)) {
SkipRemovedElements(FirstValidElement);
return true;
}
@@ -3666,8 +3714,7 @@ private:
PHINode::Create(CommonType, PredCount, "sunk_phi", CurrentPhi);
Map[Current] = PHI;
ST.insertNewPhi(PHI);
- for (Value *P : CurrentPhi->incoming_values())
- Worklist.push_back(P);
+ append_range(Worklist, CurrentPhi->incoming_values());
}
}
}
@@ -4289,7 +4336,7 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
unsigned SrcAS
= AddrInst->getOperand(0)->getType()->getPointerAddressSpace();
unsigned DestAS = AddrInst->getType()->getPointerAddressSpace();
- if (TLI.isNoopAddrSpaceCast(SrcAS, DestAS))
+ if (TLI.getTargetMachine().isNoopAddrSpaceCast(SrcAS, DestAS))
return matchAddr(AddrInst->getOperand(0), Depth);
return false;
}
@@ -4921,8 +4968,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// For a PHI node, push all of its incoming values.
if (PHINode *P = dyn_cast<PHINode>(V)) {
- for (Value *IncValue : P->incoming_values())
- worklist.push_back(IncValue);
+ append_range(worklist, P->incoming_values());
PhiOrSelectSeen = true;
continue;
}
@@ -5236,20 +5282,11 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// If we have no uses, recursively delete the value and all dead instructions
// using it.
if (Repl->use_empty()) {
- // This can cause recursive deletion, which can invalidate our iterator.
- // Use a WeakTrackingVH to hold onto it in case this happens.
- Value *CurValue = &*CurInstIterator;
- WeakTrackingVH IterHandle(CurValue);
- BasicBlock *BB = CurInstIterator->getParent();
-
- RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo);
-
- if (IterHandle != CurValue) {
- // If the iterator instruction was recursively deleted, start over at the
- // start of the block.
- CurInstIterator = BB->begin();
- SunkAddrs.clear();
- }
+ resetIteratorIfInvalidatedWhileCalling(CurInstIterator->getParent(), [&]() {
+ RecursivelyDeleteTriviallyDeadInstructions(
+ Repl, TLInfo, nullptr,
+ [&](Value *V) { removeAllAssertingVHReferences(V); });
+ });
}
++NumMemoryInsts;
return true;
@@ -5270,92 +5307,112 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
///
/// If the final index isn't a vector or is a splat, we can emit a scalar GEP
/// followed by a GEP with an all zeroes vector index. This will enable
-/// SelectionDAGBuilder to use a the scalar GEP as the uniform base and have a
+/// SelectionDAGBuilder to use the scalar GEP as the uniform base and have a
/// zero index.
bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst,
Value *Ptr) {
- const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
- if (!GEP || !GEP->hasIndices())
- return false;
+ Value *NewAddr;
- // If the GEP and the gather/scatter aren't in the same BB, don't optimize.
- // FIXME: We should support this by sinking the GEP.
- if (MemoryInst->getParent() != GEP->getParent())
- return false;
+ if (const auto *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
+ // Don't optimize GEPs that don't have indices.
+ if (!GEP->hasIndices())
+ return false;
- SmallVector<Value *, 2> Ops(GEP->op_begin(), GEP->op_end());
+ // If the GEP and the gather/scatter aren't in the same BB, don't optimize.
+ // FIXME: We should support this by sinking the GEP.
+ if (MemoryInst->getParent() != GEP->getParent())
+ return false;
- bool RewriteGEP = false;
+ SmallVector<Value *, 2> Ops(GEP->operands());
- if (Ops[0]->getType()->isVectorTy()) {
- Ops[0] = const_cast<Value *>(getSplatValue(Ops[0]));
- if (!Ops[0])
- return false;
- RewriteGEP = true;
- }
+ bool RewriteGEP = false;
- unsigned FinalIndex = Ops.size() - 1;
+ if (Ops[0]->getType()->isVectorTy()) {
+ Ops[0] = getSplatValue(Ops[0]);
+ if (!Ops[0])
+ return false;
+ RewriteGEP = true;
+ }
- // Ensure all but the last index is 0.
- // FIXME: This isn't strictly required. All that's required is that they are
- // all scalars or splats.
- for (unsigned i = 1; i < FinalIndex; ++i) {
- auto *C = dyn_cast<Constant>(Ops[i]);
- if (!C)
- return false;
- if (isa<VectorType>(C->getType()))
- C = C->getSplatValue();
- auto *CI = dyn_cast_or_null<ConstantInt>(C);
- if (!CI || !CI->isZero())
- return false;
- // Scalarize the index if needed.
- Ops[i] = CI;
- }
-
- // Try to scalarize the final index.
- if (Ops[FinalIndex]->getType()->isVectorTy()) {
- if (Value *V = const_cast<Value *>(getSplatValue(Ops[FinalIndex]))) {
- auto *C = dyn_cast<ConstantInt>(V);
- // Don't scalarize all zeros vector.
- if (!C || !C->isZero()) {
- Ops[FinalIndex] = V;
- RewriteGEP = true;
+ unsigned FinalIndex = Ops.size() - 1;
+
+ // Ensure all but the last index is 0.
+ // FIXME: This isn't strictly required. All that's required is that they are
+ // all scalars or splats.
+ for (unsigned i = 1; i < FinalIndex; ++i) {
+ auto *C = dyn_cast<Constant>(Ops[i]);
+ if (!C)
+ return false;
+ if (isa<VectorType>(C->getType()))
+ C = C->getSplatValue();
+ auto *CI = dyn_cast_or_null<ConstantInt>(C);
+ if (!CI || !CI->isZero())
+ return false;
+ // Scalarize the index if needed.
+ Ops[i] = CI;
+ }
+
+ // Try to scalarize the final index.
+ if (Ops[FinalIndex]->getType()->isVectorTy()) {
+ if (Value *V = getSplatValue(Ops[FinalIndex])) {
+ auto *C = dyn_cast<ConstantInt>(V);
+ // Don't scalarize all zeros vector.
+ if (!C || !C->isZero()) {
+ Ops[FinalIndex] = V;
+ RewriteGEP = true;
+ }
}
}
- }
- // If we made any changes or the we have extra operands, we need to generate
- // new instructions.
- if (!RewriteGEP && Ops.size() == 2)
- return false;
-
- unsigned NumElts = cast<FixedVectorType>(Ptr->getType())->getNumElements();
+ // If we made any changes or the we have extra operands, we need to generate
+ // new instructions.
+ if (!RewriteGEP && Ops.size() == 2)
+ return false;
- IRBuilder<> Builder(MemoryInst);
+ auto NumElts = cast<VectorType>(Ptr->getType())->getElementCount();
- Type *ScalarIndexTy = DL->getIndexType(Ops[0]->getType()->getScalarType());
+ IRBuilder<> Builder(MemoryInst);
- Value *NewAddr;
+ Type *ScalarIndexTy = DL->getIndexType(Ops[0]->getType()->getScalarType());
- // If the final index isn't a vector, emit a scalar GEP containing all ops
- // and a vector GEP with all zeroes final index.
- if (!Ops[FinalIndex]->getType()->isVectorTy()) {
- NewAddr = Builder.CreateGEP(Ops[0], makeArrayRef(Ops).drop_front());
- auto *IndexTy = FixedVectorType::get(ScalarIndexTy, NumElts);
- NewAddr = Builder.CreateGEP(NewAddr, Constant::getNullValue(IndexTy));
- } else {
- Value *Base = Ops[0];
- Value *Index = Ops[FinalIndex];
+ // If the final index isn't a vector, emit a scalar GEP containing all ops
+ // and a vector GEP with all zeroes final index.
+ if (!Ops[FinalIndex]->getType()->isVectorTy()) {
+ NewAddr = Builder.CreateGEP(Ops[0], makeArrayRef(Ops).drop_front());
+ auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts);
+ NewAddr = Builder.CreateGEP(NewAddr, Constant::getNullValue(IndexTy));
+ } else {
+ Value *Base = Ops[0];
+ Value *Index = Ops[FinalIndex];
+
+ // Create a scalar GEP if there are more than 2 operands.
+ if (Ops.size() != 2) {
+ // Replace the last index with 0.
+ Ops[FinalIndex] = Constant::getNullValue(ScalarIndexTy);
+ Base = Builder.CreateGEP(Base, makeArrayRef(Ops).drop_front());
+ }
- // Create a scalar GEP if there are more than 2 operands.
- if (Ops.size() != 2) {
- // Replace the last index with 0.
- Ops[FinalIndex] = Constant::getNullValue(ScalarIndexTy);
- Base = Builder.CreateGEP(Base, makeArrayRef(Ops).drop_front());
+ // Now create the GEP with scalar pointer and vector index.
+ NewAddr = Builder.CreateGEP(Base, Index);
}
+ } else if (!isa<Constant>(Ptr)) {
+ // Not a GEP, maybe its a splat and we can create a GEP to enable
+ // SelectionDAGBuilder to use it as a uniform base.
+ Value *V = getSplatValue(Ptr);
+ if (!V)
+ return false;
+
+ auto NumElts = cast<VectorType>(Ptr->getType())->getElementCount();
- // Now create the GEP with scalar pointer and vector index.
- NewAddr = Builder.CreateGEP(Base, Index);
+ IRBuilder<> Builder(MemoryInst);
+
+ // Emit a vector GEP with a scalar pointer and all 0s vector index.
+ Type *ScalarIndexTy = DL->getIndexType(V->getType()->getScalarType());
+ auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts);
+ NewAddr = Builder.CreateGEP(V, Constant::getNullValue(IndexTy));
+ } else {
+ // Constant, SelectionDAGBuilder knows to check if its a splat.
+ return false;
}
MemoryInst->replaceUsesOfWith(Ptr, NewAddr);
@@ -5363,7 +5420,9 @@ bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst,
// If we have no uses, recursively delete the value and all dead instructions
// using it.
if (Ptr->use_empty())
- RecursivelyDeleteTriviallyDeadInstructions(Ptr, TLInfo);
+ RecursivelyDeleteTriviallyDeadInstructions(
+ Ptr, TLInfo, nullptr,
+ [&](Value *V) { removeAllAssertingVHReferences(V); });
return true;
}
@@ -5752,6 +5811,12 @@ bool CodeGenPrepare::optimizePhiType(
Visited.insert(I);
SmallPtrSet<Instruction *, 4> Defs;
SmallPtrSet<Instruction *, 4> Uses;
+ // This works by adding extra bitcasts between load/stores and removing
+ // existing bicasts. If we have a phi(bitcast(load)) or a store(bitcast(phi))
+ // we can get in the situation where we remove a bitcast in one iteration
+ // just to add it again in the next. We need to ensure that at least one
+ // bitcast we remove are anchored to something that will not change back.
+ bool AnyAnchored = false;
while (!Worklist.empty()) {
Instruction *II = Worklist.pop_back_val();
@@ -5768,6 +5833,8 @@ bool CodeGenPrepare::optimizePhiType(
Worklist.push_back(OpPhi);
}
} else if (auto *OpLoad = dyn_cast<LoadInst>(V)) {
+ if (!OpLoad->isSimple())
+ return false;
if (!Defs.count(OpLoad)) {
Defs.insert(OpLoad);
Worklist.push_back(OpLoad);
@@ -5785,9 +5852,12 @@ bool CodeGenPrepare::optimizePhiType(
if (!Defs.count(OpBC)) {
Defs.insert(OpBC);
Worklist.push_back(OpBC);
+ AnyAnchored |= !isa<LoadInst>(OpBC->getOperand(0)) &&
+ !isa<ExtractElementInst>(OpBC->getOperand(0));
}
- } else if (!isa<UndefValue>(V))
+ } else if (!isa<UndefValue>(V)) {
return false;
+ }
}
}
@@ -5802,7 +5872,7 @@ bool CodeGenPrepare::optimizePhiType(
Worklist.push_back(OpPhi);
}
} else if (auto *OpStore = dyn_cast<StoreInst>(V)) {
- if (OpStore->getOperand(0) != II)
+ if (!OpStore->isSimple() || OpStore->getOperand(0) != II)
return false;
Uses.insert(OpStore);
} else if (auto *OpBC = dyn_cast<BitCastInst>(V)) {
@@ -5811,12 +5881,15 @@ bool CodeGenPrepare::optimizePhiType(
if (OpBC->getType() != ConvertTy)
return false;
Uses.insert(OpBC);
- } else
+ AnyAnchored |=
+ any_of(OpBC->users(), [](User *U) { return !isa<StoreInst>(U); });
+ } else {
return false;
+ }
}
}
- if (!ConvertTy || !TLI->shouldConvertPhiType(PhiTy, ConvertTy))
+ if (!ConvertTy || !AnyAnchored || !TLI->shouldConvertPhiType(PhiTy, ConvertTy))
return false;
LLVM_DEBUG(dbgs() << "Converting " << *I << "\n and connected nodes to "
@@ -5827,11 +5900,13 @@ bool CodeGenPrepare::optimizePhiType(
ValueToValueMap ValMap;
ValMap[UndefValue::get(PhiTy)] = UndefValue::get(ConvertTy);
for (Instruction *D : Defs) {
- if (isa<BitCastInst>(D))
+ if (isa<BitCastInst>(D)) {
ValMap[D] = D->getOperand(0);
- else
+ DeletedInstrs.insert(D);
+ } else {
ValMap[D] =
new BitCastInst(D, ConvertTy, D->getName() + ".bc", D->getNextNode());
+ }
}
for (PHINode *Phi : PhiNodes)
ValMap[Phi] = PHINode::Create(ConvertTy, Phi->getNumIncomingValues(),
@@ -5842,15 +5917,17 @@ bool CodeGenPrepare::optimizePhiType(
for (int i = 0, e = Phi->getNumIncomingValues(); i < e; i++)
NewPhi->addIncoming(ValMap[Phi->getIncomingValue(i)],
Phi->getIncomingBlock(i));
+ Visited.insert(NewPhi);
}
// And finally pipe up the stores and bitcasts
for (Instruction *U : Uses) {
if (isa<BitCastInst>(U)) {
DeletedInstrs.insert(U);
U->replaceAllUsesWith(ValMap[U->getOperand(0)]);
- } else
+ } else {
U->setOperand(0,
new BitCastInst(ValMap[U->getOperand(0)], PhiTy, "bc", U));
+ }
}
// Save the removed phis to be deleted later.
@@ -6445,9 +6522,7 @@ bool CodeGenPrepare::optimizeFunnelShift(IntrinsicInst *Fsh) {
/// If we have a SelectInst that will likely profit from branch prediction,
/// turn it into a branch.
bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
- // If branch conversion isn't desirable, exit early.
- if (DisableSelectToBranch || OptSize ||
- llvm::shouldOptimizeForSize(SI->getParent(), PSI, BFI.get()))
+ if (DisableSelectToBranch)
return false;
// Find all consecutive select instructions that share the same condition.
@@ -6483,7 +6558,8 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
SelectKind = TargetLowering::ScalarValSelect;
if (TLI->isSelectSupported(SelectKind) &&
- !isFormingBranchFromSelectProfitable(TTI, TLI, SI))
+ (!isFormingBranchFromSelectProfitable(TTI, TLI, SI) || OptSize ||
+ llvm::shouldOptimizeForSize(SI->getParent(), PSI, BFI.get())))
return false;
// The DominatorTree needs to be rebuilt by any consumers after this
@@ -6621,6 +6697,7 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
/// in MVE takes a GPR (integer) register, and the instruction that incorporate
/// a VDUP (such as a VADD qd, qm, rm) also require a gpr register.
bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
+ // Accept shuf(insertelem(undef/poison, val, 0), undef/poison, <0,0,..>) only
if (!match(SVI, m_Shuffle(m_InsertElt(m_Undef(), m_Value(), m_ZeroInt()),
m_Undef(), m_ZeroMask())))
return false;
@@ -6640,14 +6717,12 @@ bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
Builder.SetInsertPoint(SVI);
Value *BC1 = Builder.CreateBitCast(
cast<Instruction>(SVI->getOperand(0))->getOperand(1), NewType);
- Value *Insert = Builder.CreateInsertElement(UndefValue::get(NewVecType), BC1,
- (uint64_t)0);
- Value *Shuffle = Builder.CreateShuffleVector(
- Insert, UndefValue::get(NewVecType), SVI->getShuffleMask());
+ Value *Shuffle = Builder.CreateVectorSplat(NewVecType->getNumElements(), BC1);
Value *BC2 = Builder.CreateBitCast(Shuffle, SVIVecType);
SVI->replaceAllUsesWith(BC2);
- RecursivelyDeleteTriviallyDeadInstructions(SVI);
+ RecursivelyDeleteTriviallyDeadInstructions(
+ SVI, TLInfo, nullptr, [&](Value *V) { removeAllAssertingVHReferences(V); });
// Also hoist the bitcast up to its operand if it they are not in the same
// block.
@@ -6920,10 +6995,10 @@ class VectorPromoteHelper {
if (UseSplat)
return ConstantVector::getSplat(EC, Val);
- if (!EC.Scalable) {
+ if (!EC.isScalable()) {
SmallVector<Constant *, 4> ConstVec;
UndefValue *UndefVal = UndefValue::get(Val->getType());
- for (unsigned Idx = 0; Idx != EC.Min; ++Idx) {
+ for (unsigned Idx = 0; Idx != EC.getKnownMinValue(); ++Idx) {
if (Idx == ExtractIdx)
ConstVec.push_back(Val);
else
@@ -7604,11 +7679,10 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
/// Given an OR instruction, check to see if this is a bitreverse
/// idiom. If so, insert the new intrinsic and return true.
-static bool makeBitReverse(Instruction &I, const DataLayout &DL,
- const TargetLowering &TLI) {
+bool CodeGenPrepare::makeBitReverse(Instruction &I) {
if (!I.getType()->isIntegerTy() ||
- !TLI.isOperationLegalOrCustom(ISD::BITREVERSE,
- TLI.getValueType(DL, I.getType(), true)))
+ !TLI->isOperationLegalOrCustom(ISD::BITREVERSE,
+ TLI->getValueType(*DL, I.getType(), true)))
return false;
SmallVector<Instruction*, 4> Insts;
@@ -7616,7 +7690,8 @@ static bool makeBitReverse(Instruction &I, const DataLayout &DL,
return false;
Instruction *LastInst = Insts.back();
I.replaceAllUsesWith(LastInst);
- RecursivelyDeleteTriviallyDeadInstructions(&I);
+ RecursivelyDeleteTriviallyDeadInstructions(
+ &I, TLInfo, nullptr, [&](Value *V) { removeAllAssertingVHReferences(V); });
return true;
}
@@ -7638,7 +7713,7 @@ bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool &ModifiedDT) {
while (MadeBitReverse) {
MadeBitReverse = false;
for (auto &I : reverse(BB)) {
- if (makeBitReverse(I, *DL, *TLI)) {
+ if (makeBitReverse(I)) {
MadeBitReverse = MadeChange = true;
break;
}
@@ -7757,9 +7832,10 @@ bool CodeGenPrepare::splitBranchCondition(Function &F, bool &ModifiedDT) {
// %cond2 = icmp|fcmp|binary instruction ...
// %cond.or = or|and i1 %cond1, cond2
// br i1 %cond.or label %dest1, label %dest2"
- BinaryOperator *LogicOp;
+ Instruction *LogicOp;
BasicBlock *TBB, *FBB;
- if (!match(BB.getTerminator(), m_Br(m_OneUse(m_BinOp(LogicOp)), TBB, FBB)))
+ if (!match(BB.getTerminator(),
+ m_Br(m_OneUse(m_Instruction(LogicOp)), TBB, FBB)))
continue;
auto *Br1 = cast<BranchInst>(BB.getTerminator());
@@ -7772,17 +7848,22 @@ bool CodeGenPrepare::splitBranchCondition(Function &F, bool &ModifiedDT) {
unsigned Opc;
Value *Cond1, *Cond2;
- if (match(LogicOp, m_And(m_OneUse(m_Value(Cond1)),
- m_OneUse(m_Value(Cond2)))))
+ if (match(LogicOp,
+ m_LogicalAnd(m_OneUse(m_Value(Cond1)), m_OneUse(m_Value(Cond2)))))
Opc = Instruction::And;
- else if (match(LogicOp, m_Or(m_OneUse(m_Value(Cond1)),
- m_OneUse(m_Value(Cond2)))))
+ else if (match(LogicOp, m_LogicalOr(m_OneUse(m_Value(Cond1)),
+ m_OneUse(m_Value(Cond2)))))
Opc = Instruction::Or;
else
continue;
- if (!match(Cond1, m_CombineOr(m_Cmp(), m_BinOp())) ||
- !match(Cond2, m_CombineOr(m_Cmp(), m_BinOp())) )
+ auto IsGoodCond = [](Value *Cond) {
+ return match(
+ Cond,
+ m_CombineOr(m_Cmp(), m_CombineOr(m_LogicalAnd(m_Value(), m_Value()),
+ m_LogicalOr(m_Value(), m_Value()))));
+ };
+ if (!IsGoodCond(Cond1) || !IsGoodCond(Cond2))
continue;
LLVM_DEBUG(dbgs() << "Before branch condition splitting\n"; BB.dump());
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp
index 12dadf97e02c..97c110afdda4 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp
@@ -58,6 +58,7 @@ CGOPT(bool, EnableNoInfsFPMath)
CGOPT(bool, EnableNoNaNsFPMath)
CGOPT(bool, EnableNoSignedZerosFPMath)
CGOPT(bool, EnableNoTrappingFPMath)
+CGOPT(bool, EnableAIXExtendedAltivecABI)
CGOPT(DenormalMode::DenormalModeKind, DenormalFPMath)
CGOPT(DenormalMode::DenormalModeKind, DenormalFP32Math)
CGOPT(bool, EnableHonorSignDependentRoundingFPMath)
@@ -74,7 +75,12 @@ CGOPT(bool, UseCtors)
CGOPT(bool, RelaxELFRelocations)
CGOPT_EXP(bool, DataSections)
CGOPT_EXP(bool, FunctionSections)
+CGOPT(bool, IgnoreXCOFFVisibility)
+CGOPT(bool, XCOFFTracebackTable)
CGOPT(std::string, BBSections)
+CGOPT(std::string, StackProtectorGuard)
+CGOPT(unsigned, StackProtectorGuardOffset)
+CGOPT(std::string, StackProtectorGuardReg)
CGOPT(unsigned, TLSSize)
CGOPT(bool, EmulatedTLS)
CGOPT(bool, UniqueSectionNames)
@@ -84,7 +90,10 @@ CGOPT(DebuggerKind, DebuggerTuningOpt)
CGOPT(bool, EnableStackSizeSection)
CGOPT(bool, EnableAddrsig)
CGOPT(bool, EmitCallSiteInfo)
+CGOPT(bool, EnableMachineFunctionSplitter)
CGOPT(bool, EnableDebugEntryValues)
+CGOPT(bool, PseudoProbeForProfiling)
+CGOPT(bool, ValueTrackingVariableLocations)
CGOPT(bool, ForceDwarfFrameSection)
CGOPT(bool, XRayOmitFunctionIndex)
@@ -276,6 +285,11 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
cl::init(false));
CGBINDOPT(DontPlaceZerosInBSS);
+ static cl::opt<bool> EnableAIXExtendedAltivecABI(
+ "vec-extabi", cl::desc("Enable the AIX Extended Altivec ABI."),
+ cl::init(false));
+ CGBINDOPT(EnableAIXExtendedAltivecABI);
+
static cl::opt<bool> EnableGuaranteedTailCallOpt(
"tailcallopt",
cl::desc(
@@ -331,13 +345,40 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
cl::init(false));
CGBINDOPT(FunctionSections);
+ static cl::opt<bool> IgnoreXCOFFVisibility(
+ "ignore-xcoff-visibility",
+ cl::desc("Not emit the visibility attribute for asm in AIX OS or give "
+ "all symbols 'unspecified' visibility in XCOFF object file"),
+ cl::init(false));
+ CGBINDOPT(IgnoreXCOFFVisibility);
+
+ static cl::opt<bool> XCOFFTracebackTable(
+ "xcoff-traceback-table", cl::desc("Emit the XCOFF traceback table"),
+ cl::init(true));
+ CGBINDOPT(XCOFFTracebackTable);
+
static cl::opt<std::string> BBSections(
- "basicblock-sections",
+ "basic-block-sections",
cl::desc("Emit basic blocks into separate sections"),
cl::value_desc("all | <function list (file)> | labels | none"),
cl::init("none"));
CGBINDOPT(BBSections);
+ static cl::opt<std::string> StackProtectorGuard(
+ "stack-protector-guard", cl::desc("Stack protector guard mode"),
+ cl::init("none"));
+ CGBINDOPT(StackProtectorGuard);
+
+ static cl::opt<std::string> StackProtectorGuardReg(
+ "stack-protector-guard-reg", cl::desc("Stack protector guard register"),
+ cl::init("none"));
+ CGBINDOPT(StackProtectorGuardReg);
+
+ static cl::opt<unsigned> StackProtectorGuardOffset(
+ "stack-protector-guard-offset", cl::desc("Stack protector guard offset"),
+ cl::init((unsigned)-1));
+ CGBINDOPT(StackProtectorGuardOffset);
+
static cl::opt<unsigned> TLSSize(
"tls-size", cl::desc("Bit size of immediate TLS offsets"), cl::init(0));
CGBINDOPT(TLSSize);
@@ -352,7 +393,7 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
CGBINDOPT(UniqueSectionNames);
static cl::opt<bool> UniqueBasicBlockSectionNames(
- "unique-bb-section-names",
+ "unique-basic-block-section-names",
cl::desc("Give unique names to every basic block section"),
cl::init(false));
CGBINDOPT(UniqueBasicBlockSectionNames);
@@ -400,6 +441,24 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
cl::init(false));
CGBINDOPT(EnableDebugEntryValues);
+ static cl::opt<bool> PseudoProbeForProfiling(
+ "pseudo-probe-for-profiling", cl::desc("Emit pseudo probes for AutoFDO"),
+ cl::init(false));
+ CGBINDOPT(PseudoProbeForProfiling);
+
+ static cl::opt<bool> ValueTrackingVariableLocations(
+ "experimental-debug-variable-locations",
+ cl::desc("Use experimental new value-tracking variable locations"),
+ cl::init(false));
+ CGBINDOPT(ValueTrackingVariableLocations);
+
+ static cl::opt<bool> EnableMachineFunctionSplitter(
+ "split-machine-functions",
+ cl::desc("Split out cold basic blocks from machine functions based on "
+ "profile information"),
+ cl::init(false));
+ CGBINDOPT(EnableMachineFunctionSplitter);
+
static cl::opt<bool> ForceDwarfFrameSection(
"force-dwarf-frame-section",
cl::desc("Always emit a debug frame section."), cl::init(false));
@@ -436,9 +495,28 @@ codegen::getBBSectionsMode(llvm::TargetOptions &Options) {
}
}
+llvm::StackProtectorGuards
+codegen::getStackProtectorGuardMode(llvm::TargetOptions &Options) {
+ if (getStackProtectorGuard() == "tls")
+ return StackProtectorGuards::TLS;
+ if (getStackProtectorGuard() == "global")
+ return StackProtectorGuards::Global;
+ if (getStackProtectorGuard() != "none") {
+ ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr =
+ MemoryBuffer::getFile(getStackProtectorGuard());
+ if (!MBOrErr)
+ errs() << "error illegal stack protector guard mode: "
+ << MBOrErr.getError().message() << "\n";
+ else
+ Options.BBSectionsFuncListBuf = std::move(*MBOrErr);
+ }
+ return StackProtectorGuards::None;
+}
+
// Common utility function tightly tied to the options listed here. Initializes
// a TargetOptions object with CodeGen flags and returns it.
-TargetOptions codegen::InitTargetOptionsFromCodeGenFlags() {
+TargetOptions
+codegen::InitTargetOptionsFromCodeGenFlags(const Triple &TheTriple) {
TargetOptions Options;
Options.AllowFPOpFusion = getFuseFPOps();
Options.UnsafeFPMath = getEnableUnsafeFPMath();
@@ -456,25 +534,35 @@ TargetOptions codegen::InitTargetOptionsFromCodeGenFlags() {
getEnableHonorSignDependentRoundingFPMath();
if (getFloatABIForCalls() != FloatABI::Default)
Options.FloatABIType = getFloatABIForCalls();
+ Options.EnableAIXExtendedAltivecABI = getEnableAIXExtendedAltivecABI();
Options.NoZerosInBSS = getDontPlaceZerosInBSS();
Options.GuaranteedTailCallOpt = getEnableGuaranteedTailCallOpt();
Options.StackAlignmentOverride = getOverrideStackAlignment();
Options.StackSymbolOrdering = getStackSymbolOrdering();
Options.UseInitArray = !getUseCtors();
Options.RelaxELFRelocations = getRelaxELFRelocations();
- Options.DataSections = getDataSections();
+ Options.DataSections =
+ getExplicitDataSections().getValueOr(TheTriple.hasDefaultDataSections());
Options.FunctionSections = getFunctionSections();
+ Options.IgnoreXCOFFVisibility = getIgnoreXCOFFVisibility();
+ Options.XCOFFTracebackTable = getXCOFFTracebackTable();
Options.BBSections = getBBSectionsMode(Options);
Options.UniqueSectionNames = getUniqueSectionNames();
Options.UniqueBasicBlockSectionNames = getUniqueBasicBlockSectionNames();
+ Options.StackProtectorGuard = getStackProtectorGuardMode(Options);
+ Options.StackProtectorGuardOffset = getStackProtectorGuardOffset();
+ Options.StackProtectorGuardReg = getStackProtectorGuardReg();
Options.TLSSize = getTLSSize();
Options.EmulatedTLS = getEmulatedTLS();
Options.ExplicitEmulatedTLS = EmulatedTLSView->getNumOccurrences() > 0;
Options.ExceptionModel = getExceptionModel();
Options.EmitStackSizeSection = getEnableStackSizeSection();
+ Options.EnableMachineFunctionSplitter = getEnableMachineFunctionSplitter();
Options.EmitAddrsig = getEnableAddrsig();
Options.EmitCallSiteInfo = getEmitCallSiteInfo();
Options.EnableDebugEntryValues = getEnableDebugEntryValues();
+ Options.PseudoProbeForProfiling = getPseudoProbeForProfiling();
+ Options.ValueTrackingVariableLocations = getValueTrackingVariableLocations();
Options.ForceDwarfFrameSection = getForceDwarfFrameSection();
Options.XRayOmitFunctionIndex = getXRayOmitFunctionIndex();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
index d1529b08f708..93467e9d09b8 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -10,6 +10,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -48,6 +49,8 @@ namespace {
private:
bool isDead(const MachineInstr *MI) const;
+
+ bool eliminateDeadMI(MachineFunction &MF);
};
}
char DeadMachineInstructionElim::ID = 0;
@@ -107,7 +110,13 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
+ bool AnyChanges = eliminateDeadMI(MF);
+ while (AnyChanges && eliminateDeadMI(MF))
+ ;
+ return AnyChanges;
+}
+bool DeadMachineInstructionElim::eliminateDeadMI(MachineFunction &MF) {
bool AnyChanges = false;
MRI = &MF.getRegInfo();
TRI = MF.getSubtarget().getRegisterInfo();
@@ -116,22 +125,24 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
// Loop over all instructions in all blocks, from bottom to top, so that it's
// more likely that chains of dependent but ultimately dead instructions will
// be cleaned up.
- for (MachineBasicBlock &MBB : make_range(MF.rbegin(), MF.rend())) {
+ for (MachineBasicBlock *MBB : post_order(&MF)) {
// Start out assuming that reserved registers are live out of this block.
LivePhysRegs = MRI->getReservedRegs();
// Add live-ins from successors to LivePhysRegs. Normally, physregs are not
// live across blocks, but some targets (x86) can have flags live out of a
// block.
- for (MachineBasicBlock::succ_iterator S = MBB.succ_begin(),
- E = MBB.succ_end(); S != E; S++)
+ for (MachineBasicBlock::succ_iterator S = MBB->succ_begin(),
+ E = MBB->succ_end();
+ S != E; S++)
for (const auto &LI : (*S)->liveins())
LivePhysRegs.set(LI.PhysReg);
// Now scan the instructions and delete dead ones, tracking physreg
// liveness as we go.
- for (MachineBasicBlock::reverse_iterator MII = MBB.rbegin(),
- MIE = MBB.rend(); MII != MIE; ) {
+ for (MachineBasicBlock::reverse_iterator MII = MBB->rbegin(),
+ MIE = MBB->rend();
+ MII != MIE;) {
MachineInstr *MI = &*MII++;
// If the instruction is dead, delete it!
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/DetectDeadLanes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/DetectDeadLanes.cpp
index 6d5306c1dc0c..03fe5f155291 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/DetectDeadLanes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/DetectDeadLanes.cpp
@@ -25,11 +25,7 @@
//
//===----------------------------------------------------------------------===//
-#include <deque>
-#include <vector>
-
#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/SetVector.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
@@ -40,6 +36,7 @@
#include "llvm/PassRegistry.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include <deque>
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/DwarfEHPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/DwarfEHPrepare.cpp
index c75c957bff8a..97e0162f35a1 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/DwarfEHPrepare.cpp
@@ -15,6 +15,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
@@ -44,67 +45,44 @@ STATISTIC(NumResumesLowered, "Number of resume calls lowered");
namespace {
- class DwarfEHPrepare : public FunctionPass {
- // RewindFunction - _Unwind_Resume or the target equivalent.
- FunctionCallee RewindFunction = nullptr;
+class DwarfEHPrepare {
+ CodeGenOpt::Level OptLevel;
- CodeGenOpt::Level OptLevel;
- DominatorTree *DT = nullptr;
- const TargetLowering *TLI = nullptr;
-
- bool InsertUnwindResumeCalls(Function &Fn);
- Value *GetExceptionObject(ResumeInst *RI);
- size_t
- pruneUnreachableResumes(Function &Fn,
- SmallVectorImpl<ResumeInst *> &Resumes,
- SmallVectorImpl<LandingPadInst *> &CleanupLPads);
+ // RewindFunction - _Unwind_Resume or the target equivalent.
+ FunctionCallee &RewindFunction;
- public:
- static char ID; // Pass identification, replacement for typeid.
-
- DwarfEHPrepare(CodeGenOpt::Level OptLevel = CodeGenOpt::Default)
- : FunctionPass(ID), OptLevel(OptLevel) {}
+ Function &F;
+ const TargetLowering &TLI;
+ DomTreeUpdater *DTU;
+ const TargetTransformInfo *TTI;
- bool runOnFunction(Function &Fn) override;
+ /// Return the exception object from the value passed into
+ /// the 'resume' instruction (typically an aggregate). Clean up any dead
+ /// instructions, including the 'resume' instruction.
+ Value *GetExceptionObject(ResumeInst *RI);
- bool doFinalization(Module &M) override {
- RewindFunction = nullptr;
- return false;
- }
+ /// Replace resumes that are not reachable from a cleanup landing pad with
+ /// unreachable and then simplify those blocks.
+ size_t
+ pruneUnreachableResumes(SmallVectorImpl<ResumeInst *> &Resumes,
+ SmallVectorImpl<LandingPadInst *> &CleanupLPads);
- void getAnalysisUsage(AnalysisUsage &AU) const override;
+ /// Convert the ResumeInsts that are still present
+ /// into calls to the appropriate _Unwind_Resume function.
+ bool InsertUnwindResumeCalls();
- StringRef getPassName() const override {
- return "Exception handling preparation";
- }
- };
+public:
+ DwarfEHPrepare(CodeGenOpt::Level OptLevel_, FunctionCallee &RewindFunction_,
+ Function &F_, const TargetLowering &TLI_, DomTreeUpdater *DTU_,
+ const TargetTransformInfo *TTI_)
+ : OptLevel(OptLevel_), RewindFunction(RewindFunction_), F(F_), TLI(TLI_),
+ DTU(DTU_), TTI(TTI_) {}
-} // end anonymous namespace
+ bool run();
+};
-char DwarfEHPrepare::ID = 0;
+} // namespace
-INITIALIZE_PASS_BEGIN(DwarfEHPrepare, DEBUG_TYPE,
- "Prepare DWARF exceptions", false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
-INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_END(DwarfEHPrepare, DEBUG_TYPE,
- "Prepare DWARF exceptions", false, false)
-
-FunctionPass *llvm::createDwarfEHPass(CodeGenOpt::Level OptLevel) {
- return new DwarfEHPrepare(OptLevel);
-}
-
-void DwarfEHPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<TargetPassConfig>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
- if (OptLevel != CodeGenOpt::None)
- AU.addRequired<DominatorTreeWrapperPass>();
-}
-
-/// GetExceptionObject - Return the exception object from the value passed into
-/// the 'resume' instruction (typically an aggregate). Clean up any dead
-/// instructions, including the 'resume' instruction.
Value *DwarfEHPrepare::GetExceptionObject(ResumeInst *RI) {
Value *V = RI->getOperand(0);
Value *ExnObj = nullptr;
@@ -142,16 +120,16 @@ Value *DwarfEHPrepare::GetExceptionObject(ResumeInst *RI) {
return ExnObj;
}
-/// Replace resumes that are not reachable from a cleanup landing pad with
-/// unreachable and then simplify those blocks.
size_t DwarfEHPrepare::pruneUnreachableResumes(
- Function &Fn, SmallVectorImpl<ResumeInst *> &Resumes,
+ SmallVectorImpl<ResumeInst *> &Resumes,
SmallVectorImpl<LandingPadInst *> &CleanupLPads) {
+ assert(DTU && "Should have DomTreeUpdater here.");
+
BitVector ResumeReachable(Resumes.size());
size_t ResumeIndex = 0;
for (auto *RI : Resumes) {
for (auto *LP : CleanupLPads) {
- if (isPotentiallyReachable(LP, RI, nullptr, DT)) {
+ if (isPotentiallyReachable(LP, RI, nullptr, &DTU->getDomTree())) {
ResumeReachable.set(ResumeIndex);
break;
}
@@ -163,9 +141,7 @@ size_t DwarfEHPrepare::pruneUnreachableResumes(
if (ResumeReachable.all())
return Resumes.size();
- const TargetTransformInfo &TTI =
- getAnalysis<TargetTransformInfoWrapperPass>().getTTI(Fn);
- LLVMContext &Ctx = Fn.getContext();
+ LLVMContext &Ctx = F.getContext();
// Otherwise, insert unreachable instructions and call simplifycfg.
size_t ResumesLeft = 0;
@@ -177,19 +153,17 @@ size_t DwarfEHPrepare::pruneUnreachableResumes(
BasicBlock *BB = RI->getParent();
new UnreachableInst(Ctx, RI);
RI->eraseFromParent();
- simplifyCFG(BB, TTI);
+ simplifyCFG(BB, *TTI, RequireAndPreserveDomTree ? DTU : nullptr);
}
}
Resumes.resize(ResumesLeft);
return ResumesLeft;
}
-/// InsertUnwindResumeCalls - Convert the ResumeInsts that are still present
-/// into calls to the appropriate _Unwind_Resume function.
-bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) {
- SmallVector<ResumeInst*, 16> Resumes;
- SmallVector<LandingPadInst*, 16> CleanupLPads;
- for (BasicBlock &BB : Fn) {
+bool DwarfEHPrepare::InsertUnwindResumeCalls() {
+ SmallVector<ResumeInst *, 16> Resumes;
+ SmallVector<LandingPadInst *, 16> CleanupLPads;
+ for (BasicBlock &BB : F) {
if (auto *RI = dyn_cast<ResumeInst>(BB.getTerminator()))
Resumes.push_back(RI);
if (auto *LP = BB.getLandingPadInst())
@@ -201,25 +175,25 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) {
return false;
// Check the personality, don't do anything if it's scope-based.
- EHPersonality Pers = classifyEHPersonality(Fn.getPersonalityFn());
+ EHPersonality Pers = classifyEHPersonality(F.getPersonalityFn());
if (isScopedEHPersonality(Pers))
return false;
- LLVMContext &Ctx = Fn.getContext();
+ LLVMContext &Ctx = F.getContext();
size_t ResumesLeft = Resumes.size();
if (OptLevel != CodeGenOpt::None)
- ResumesLeft = pruneUnreachableResumes(Fn, Resumes, CleanupLPads);
+ ResumesLeft = pruneUnreachableResumes(Resumes, CleanupLPads);
if (ResumesLeft == 0)
return true; // We pruned them all.
// Find the rewind function if we didn't already.
if (!RewindFunction) {
- FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx),
- Type::getInt8PtrTy(Ctx), false);
- const char *RewindName = TLI->getLibcallName(RTLIB::UNWIND_RESUME);
- RewindFunction = Fn.getParent()->getOrInsertFunction(RewindName, FTy);
+ FunctionType *FTy =
+ FunctionType::get(Type::getVoidTy(Ctx), Type::getInt8PtrTy(Ctx), false);
+ const char *RewindName = TLI.getLibcallName(RTLIB::UNWIND_RESUME);
+ RewindFunction = F.getParent()->getOrInsertFunction(RewindName, FTy);
}
// Create the basic block where the _Unwind_Resume call will live.
@@ -232,22 +206,27 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) {
// Call the _Unwind_Resume function.
CallInst *CI = CallInst::Create(RewindFunction, ExnObj, "", UnwindBB);
- CI->setCallingConv(TLI->getLibcallCallingConv(RTLIB::UNWIND_RESUME));
+ CI->setCallingConv(TLI.getLibcallCallingConv(RTLIB::UNWIND_RESUME));
// We never expect _Unwind_Resume to return.
+ CI->setDoesNotReturn();
new UnreachableInst(Ctx, UnwindBB);
return true;
}
- BasicBlock *UnwindBB = BasicBlock::Create(Ctx, "unwind_resume", &Fn);
- PHINode *PN = PHINode::Create(Type::getInt8PtrTy(Ctx), ResumesLeft,
- "exn.obj", UnwindBB);
+ std::vector<DominatorTree::UpdateType> Updates;
+ Updates.reserve(Resumes.size());
+
+ BasicBlock *UnwindBB = BasicBlock::Create(Ctx, "unwind_resume", &F);
+ PHINode *PN = PHINode::Create(Type::getInt8PtrTy(Ctx), ResumesLeft, "exn.obj",
+ UnwindBB);
// Extract the exception object from the ResumeInst and add it to the PHI node
// that feeds the _Unwind_Resume call.
for (ResumeInst *RI : Resumes) {
BasicBlock *Parent = RI->getParent();
BranchInst::Create(UnwindBB, Parent);
+ Updates.push_back({DominatorTree::Insert, Parent, UnwindBB});
Value *ExnObj = GetExceptionObject(RI);
PN->addIncoming(ExnObj, Parent);
@@ -257,21 +236,100 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) {
// Call the function.
CallInst *CI = CallInst::Create(RewindFunction, PN, "", UnwindBB);
- CI->setCallingConv(TLI->getLibcallCallingConv(RTLIB::UNWIND_RESUME));
+ CI->setCallingConv(TLI.getLibcallCallingConv(RTLIB::UNWIND_RESUME));
// We never expect _Unwind_Resume to return.
+ CI->setDoesNotReturn();
new UnreachableInst(Ctx, UnwindBB);
+
+ if (DTU && RequireAndPreserveDomTree)
+ DTU->applyUpdates(Updates);
+
return true;
}
-bool DwarfEHPrepare::runOnFunction(Function &Fn) {
- const TargetMachine &TM =
- getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
- DT = OptLevel != CodeGenOpt::None
- ? &getAnalysis<DominatorTreeWrapperPass>().getDomTree() : nullptr;
- TLI = TM.getSubtargetImpl(Fn)->getTargetLowering();
- bool Changed = InsertUnwindResumeCalls(Fn);
- DT = nullptr;
- TLI = nullptr;
+bool DwarfEHPrepare::run() {
+ assert(((OptLevel == CodeGenOpt::None || !RequireAndPreserveDomTree) ||
+ (DTU &&
+ DTU->getDomTree().verify(DominatorTree::VerificationLevel::Full))) &&
+ "Original domtree is invalid?");
+
+ bool Changed = InsertUnwindResumeCalls();
+
+ assert(((OptLevel == CodeGenOpt::None || !RequireAndPreserveDomTree) ||
+ (DTU &&
+ DTU->getDomTree().verify(DominatorTree::VerificationLevel::Full))) &&
+ "Original domtree is invalid?");
+
return Changed;
}
+
+static bool prepareDwarfEH(CodeGenOpt::Level OptLevel,
+ FunctionCallee &RewindFunction, Function &F,
+ const TargetLowering &TLI, DominatorTree *DT,
+ const TargetTransformInfo *TTI) {
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
+
+ return DwarfEHPrepare(OptLevel, RewindFunction, F, TLI, DT ? &DTU : nullptr,
+ TTI)
+ .run();
+}
+
+namespace {
+
+class DwarfEHPrepareLegacyPass : public FunctionPass {
+ // RewindFunction - _Unwind_Resume or the target equivalent.
+ FunctionCallee RewindFunction = nullptr;
+
+ CodeGenOpt::Level OptLevel;
+
+public:
+ static char ID; // Pass identification, replacement for typeid.
+
+ DwarfEHPrepareLegacyPass(CodeGenOpt::Level OptLevel = CodeGenOpt::Default)
+ : FunctionPass(ID), OptLevel(OptLevel) {}
+
+ bool runOnFunction(Function &F) override {
+ const TargetMachine &TM =
+ getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
+ const TargetLowering &TLI = *TM.getSubtargetImpl(F)->getTargetLowering();
+ DominatorTree *DT = nullptr;
+ const TargetTransformInfo *TTI = nullptr;
+ if (OptLevel != CodeGenOpt::None) {
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ }
+ return prepareDwarfEH(OptLevel, RewindFunction, F, TLI, DT, TTI);
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetPassConfig>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ if (OptLevel != CodeGenOpt::None) {
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ if (RequireAndPreserveDomTree)
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ }
+ }
+
+ StringRef getPassName() const override {
+ return "Exception handling preparation";
+ }
+};
+
+} // end anonymous namespace
+
+char DwarfEHPrepareLegacyPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(DwarfEHPrepareLegacyPass, DEBUG_TYPE,
+ "Prepare DWARF exceptions", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_END(DwarfEHPrepareLegacyPass, DEBUG_TYPE,
+ "Prepare DWARF exceptions", false, false)
+
+FunctionPass *llvm::createDwarfEHPass(CodeGenOpt::Level OptLevel) {
+ return new DwarfEHPrepareLegacyPass(OptLevel);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp b/contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp
index 96d4efb856c1..cf7d93d6a33a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp
@@ -27,6 +27,7 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineTraceMetrics.h"
#include "llvm/CodeGen/Passes.h"
@@ -264,7 +265,8 @@ bool SSAIfConv::InstrDependenciesAllowIfConv(MachineInstr *I) {
// Remember clobbered regunits.
if (MO.isDef() && Register::isPhysicalRegister(Reg))
- for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
+ for (MCRegUnitIterator Units(Reg.asMCReg(), TRI); Units.isValid();
+ ++Units)
ClobberedRegUnits.set(*Units);
if (!MO.readsReg() || !Register::isVirtualRegister(Reg))
@@ -363,7 +365,7 @@ bool SSAIfConv::findInsertionPoint() {
// Keep track of live regunits before the current position.
// Only track RegUnits that are also in ClobberedRegUnits.
LiveRegUnits.clear();
- SmallVector<unsigned, 8> Reads;
+ SmallVector<MCRegister, 8> Reads;
MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator();
MachineBasicBlock::iterator I = Head->end();
MachineBasicBlock::iterator B = Head->begin();
@@ -385,11 +387,12 @@ bool SSAIfConv::findInsertionPoint() {
continue;
// I clobbers Reg, so it isn't live before I.
if (MO.isDef())
- for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
+ for (MCRegUnitIterator Units(Reg.asMCReg(), TRI); Units.isValid();
+ ++Units)
LiveRegUnits.erase(*Units);
// Unless I reads Reg.
if (MO.readsReg())
- Reads.push_back(Reg);
+ Reads.push_back(Reg.asMCReg());
}
// Anything read by I is live before I.
while (!Reads.empty())
@@ -794,6 +797,17 @@ static unsigned adjCycles(unsigned Cyc, int Delta) {
return Cyc + Delta;
}
+namespace {
+/// Helper class to simplify emission of cycle counts into optimization remarks.
+struct Cycles {
+ const char *Key;
+ unsigned Value;
+};
+template <typename Remark> Remark &operator<<(Remark &R, Cycles C) {
+ return R << ore::NV(C.Key, C.Value) << (C.Value == 1 ? " cycle" : " cycles");
+}
+} // anonymous namespace
+
/// Apply cost model and heuristics to the if-conversion in IfConv.
/// Return true if the conversion is a good idea.
///
@@ -814,6 +828,9 @@ bool EarlyIfConverter::shouldConvertIf() {
// Set a somewhat arbitrary limit on the critical path extension we accept.
unsigned CritLimit = SchedModel.MispredictPenalty/2;
+ MachineBasicBlock &MBB = *IfConv.Head;
+ MachineOptimizationRemarkEmitter MORE(*MBB.getParent(), nullptr);
+
// If-conversion only makes sense when there is unexploited ILP. Compute the
// maximum-ILP resource length of the trace after if-conversion. Compare it
// to the shortest critical path.
@@ -825,6 +842,17 @@ bool EarlyIfConverter::shouldConvertIf() {
<< ", minimal critical path " << MinCrit << '\n');
if (ResLength > MinCrit + CritLimit) {
LLVM_DEBUG(dbgs() << "Not enough available ILP.\n");
+ MORE.emit([&]() {
+ MachineOptimizationRemarkMissed R(DEBUG_TYPE, "IfConversion",
+ MBB.findDebugLoc(MBB.back()), &MBB);
+ R << "did not if-convert branch: the resulting critical path ("
+ << Cycles{"ResLength", ResLength}
+ << ") would extend the shorter leg's critical path ("
+ << Cycles{"MinCrit", MinCrit} << ") by more than the threshold of "
+ << Cycles{"CritLimit", CritLimit}
+ << ", which cannot be hidden by available ILP.";
+ return R;
+ });
return false;
}
@@ -839,6 +867,14 @@ bool EarlyIfConverter::shouldConvertIf() {
// Look at all the tail phis, and compute the critical path extension caused
// by inserting select instructions.
MachineTraceMetrics::Trace TailTrace = MinInstr->getTrace(IfConv.Tail);
+ struct CriticalPathInfo {
+ unsigned Extra; // Count of extra cycles that the component adds.
+ unsigned Depth; // Absolute depth of the component in cycles.
+ };
+ CriticalPathInfo Cond{};
+ CriticalPathInfo TBlock{};
+ CriticalPathInfo FBlock{};
+ bool ShouldConvert = true;
for (unsigned i = 0, e = IfConv.PHIs.size(); i != e; ++i) {
SSAIfConv::PHIInfo &PI = IfConv.PHIs[i];
unsigned Slack = TailTrace.getInstrSlack(*PI.PHI);
@@ -850,9 +886,11 @@ bool EarlyIfConverter::shouldConvertIf() {
if (CondDepth > MaxDepth) {
unsigned Extra = CondDepth - MaxDepth;
LLVM_DEBUG(dbgs() << "Condition adds " << Extra << " cycles.\n");
+ if (Extra > Cond.Extra)
+ Cond = {Extra, CondDepth};
if (Extra > CritLimit) {
LLVM_DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n');
- return false;
+ ShouldConvert = false;
}
}
@@ -861,9 +899,11 @@ bool EarlyIfConverter::shouldConvertIf() {
if (TDepth > MaxDepth) {
unsigned Extra = TDepth - MaxDepth;
LLVM_DEBUG(dbgs() << "TBB data adds " << Extra << " cycles.\n");
+ if (Extra > TBlock.Extra)
+ TBlock = {Extra, TDepth};
if (Extra > CritLimit) {
LLVM_DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n');
- return false;
+ ShouldConvert = false;
}
}
@@ -872,13 +912,63 @@ bool EarlyIfConverter::shouldConvertIf() {
if (FDepth > MaxDepth) {
unsigned Extra = FDepth - MaxDepth;
LLVM_DEBUG(dbgs() << "FBB data adds " << Extra << " cycles.\n");
+ if (Extra > FBlock.Extra)
+ FBlock = {Extra, FDepth};
if (Extra > CritLimit) {
LLVM_DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n');
- return false;
+ ShouldConvert = false;
}
}
}
- return true;
+
+ // Organize by "short" and "long" legs, since the diagnostics get confusing
+ // when referring to the "true" and "false" sides of the branch, given that
+ // those don't always correlate with what the user wrote in source-terms.
+ const CriticalPathInfo Short = TBlock.Extra > FBlock.Extra ? FBlock : TBlock;
+ const CriticalPathInfo Long = TBlock.Extra > FBlock.Extra ? TBlock : FBlock;
+
+ if (ShouldConvert) {
+ MORE.emit([&]() {
+ MachineOptimizationRemark R(DEBUG_TYPE, "IfConversion",
+ MBB.back().getDebugLoc(), &MBB);
+ R << "performing if-conversion on branch: the condition adds "
+ << Cycles{"CondCycles", Cond.Extra} << " to the critical path";
+ if (Short.Extra > 0)
+ R << ", and the short leg adds another "
+ << Cycles{"ShortCycles", Short.Extra};
+ if (Long.Extra > 0)
+ R << ", and the long leg adds another "
+ << Cycles{"LongCycles", Long.Extra};
+ R << ", each staying under the threshold of "
+ << Cycles{"CritLimit", CritLimit} << ".";
+ return R;
+ });
+ } else {
+ MORE.emit([&]() {
+ MachineOptimizationRemarkMissed R(DEBUG_TYPE, "IfConversion",
+ MBB.back().getDebugLoc(), &MBB);
+ R << "did not if-convert branch: the condition would add "
+ << Cycles{"CondCycles", Cond.Extra} << " to the critical path";
+ if (Cond.Extra > CritLimit)
+ R << " exceeding the limit of " << Cycles{"CritLimit", CritLimit};
+ if (Short.Extra > 0) {
+ R << ", and the short leg would add another "
+ << Cycles{"ShortCycles", Short.Extra};
+ if (Short.Extra > CritLimit)
+ R << " exceeding the limit of " << Cycles{"CritLimit", CritLimit};
+ }
+ if (Long.Extra > 0) {
+ R << ", and the long leg would add another "
+ << Cycles{"LongCycles", Long.Extra};
+ if (Long.Extra > CritLimit)
+ R << " exceeding the limit of " << Cycles{"CritLimit", CritLimit};
+ }
+ R << ".";
+ return R;
+ });
+ }
+
+ return ShouldConvert;
}
/// Attempt repeated if-conversion on MBB, return true if successful.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ExpandReductions.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ExpandReductions.cpp
index 45f21c1085dd..a4c9f02dc64d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ExpandReductions.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ExpandReductions.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
//
// This pass implements IR expansion for reduction intrinsics, allowing targets
-// to enable the experimental intrinsics until just before codegen.
+// to enable the intrinsics until just before codegen.
//
//===----------------------------------------------------------------------===//
@@ -30,49 +30,49 @@ namespace {
unsigned getOpcode(Intrinsic::ID ID) {
switch (ID) {
- case Intrinsic::experimental_vector_reduce_v2_fadd:
+ case Intrinsic::vector_reduce_fadd:
return Instruction::FAdd;
- case Intrinsic::experimental_vector_reduce_v2_fmul:
+ case Intrinsic::vector_reduce_fmul:
return Instruction::FMul;
- case Intrinsic::experimental_vector_reduce_add:
+ case Intrinsic::vector_reduce_add:
return Instruction::Add;
- case Intrinsic::experimental_vector_reduce_mul:
+ case Intrinsic::vector_reduce_mul:
return Instruction::Mul;
- case Intrinsic::experimental_vector_reduce_and:
+ case Intrinsic::vector_reduce_and:
return Instruction::And;
- case Intrinsic::experimental_vector_reduce_or:
+ case Intrinsic::vector_reduce_or:
return Instruction::Or;
- case Intrinsic::experimental_vector_reduce_xor:
+ case Intrinsic::vector_reduce_xor:
return Instruction::Xor;
- case Intrinsic::experimental_vector_reduce_smax:
- case Intrinsic::experimental_vector_reduce_smin:
- case Intrinsic::experimental_vector_reduce_umax:
- case Intrinsic::experimental_vector_reduce_umin:
+ case Intrinsic::vector_reduce_smax:
+ case Intrinsic::vector_reduce_smin:
+ case Intrinsic::vector_reduce_umax:
+ case Intrinsic::vector_reduce_umin:
return Instruction::ICmp;
- case Intrinsic::experimental_vector_reduce_fmax:
- case Intrinsic::experimental_vector_reduce_fmin:
+ case Intrinsic::vector_reduce_fmax:
+ case Intrinsic::vector_reduce_fmin:
return Instruction::FCmp;
default:
llvm_unreachable("Unexpected ID");
}
}
-RecurrenceDescriptor::MinMaxRecurrenceKind getMRK(Intrinsic::ID ID) {
+RecurKind getRK(Intrinsic::ID ID) {
switch (ID) {
- case Intrinsic::experimental_vector_reduce_smax:
- return RecurrenceDescriptor::MRK_SIntMax;
- case Intrinsic::experimental_vector_reduce_smin:
- return RecurrenceDescriptor::MRK_SIntMin;
- case Intrinsic::experimental_vector_reduce_umax:
- return RecurrenceDescriptor::MRK_UIntMax;
- case Intrinsic::experimental_vector_reduce_umin:
- return RecurrenceDescriptor::MRK_UIntMin;
- case Intrinsic::experimental_vector_reduce_fmax:
- return RecurrenceDescriptor::MRK_FloatMax;
- case Intrinsic::experimental_vector_reduce_fmin:
- return RecurrenceDescriptor::MRK_FloatMin;
+ case Intrinsic::vector_reduce_smax:
+ return RecurKind::SMax;
+ case Intrinsic::vector_reduce_smin:
+ return RecurKind::SMin;
+ case Intrinsic::vector_reduce_umax:
+ return RecurKind::UMax;
+ case Intrinsic::vector_reduce_umin:
+ return RecurKind::UMin;
+ case Intrinsic::vector_reduce_fmax:
+ return RecurKind::FMax;
+ case Intrinsic::vector_reduce_fmin:
+ return RecurKind::FMin;
default:
- return RecurrenceDescriptor::MRK_Invalid;
+ return RecurKind::None;
}
}
@@ -83,19 +83,19 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
switch (II->getIntrinsicID()) {
default: break;
- case Intrinsic::experimental_vector_reduce_v2_fadd:
- case Intrinsic::experimental_vector_reduce_v2_fmul:
- case Intrinsic::experimental_vector_reduce_add:
- case Intrinsic::experimental_vector_reduce_mul:
- case Intrinsic::experimental_vector_reduce_and:
- case Intrinsic::experimental_vector_reduce_or:
- case Intrinsic::experimental_vector_reduce_xor:
- case Intrinsic::experimental_vector_reduce_smax:
- case Intrinsic::experimental_vector_reduce_smin:
- case Intrinsic::experimental_vector_reduce_umax:
- case Intrinsic::experimental_vector_reduce_umin:
- case Intrinsic::experimental_vector_reduce_fmax:
- case Intrinsic::experimental_vector_reduce_fmin:
+ case Intrinsic::vector_reduce_fadd:
+ case Intrinsic::vector_reduce_fmul:
+ case Intrinsic::vector_reduce_add:
+ case Intrinsic::vector_reduce_mul:
+ case Intrinsic::vector_reduce_and:
+ case Intrinsic::vector_reduce_or:
+ case Intrinsic::vector_reduce_xor:
+ case Intrinsic::vector_reduce_smax:
+ case Intrinsic::vector_reduce_smin:
+ case Intrinsic::vector_reduce_umax:
+ case Intrinsic::vector_reduce_umin:
+ case Intrinsic::vector_reduce_fmax:
+ case Intrinsic::vector_reduce_fmin:
if (TTI->shouldExpandReduction(II))
Worklist.push_back(II);
@@ -108,7 +108,7 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
FastMathFlags FMF =
isa<FPMathOperator>(II) ? II->getFastMathFlags() : FastMathFlags{};
Intrinsic::ID ID = II->getIntrinsicID();
- RecurrenceDescriptor::MinMaxRecurrenceKind MRK = getMRK(ID);
+ RecurKind RK = getRK(ID);
Value *Rdx = nullptr;
IRBuilder<> Builder(II);
@@ -116,42 +116,54 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
Builder.setFastMathFlags(FMF);
switch (ID) {
default: llvm_unreachable("Unexpected intrinsic!");
- case Intrinsic::experimental_vector_reduce_v2_fadd:
- case Intrinsic::experimental_vector_reduce_v2_fmul: {
+ case Intrinsic::vector_reduce_fadd:
+ case Intrinsic::vector_reduce_fmul: {
// FMFs must be attached to the call, otherwise it's an ordered reduction
// and it can't be handled by generating a shuffle sequence.
Value *Acc = II->getArgOperand(0);
Value *Vec = II->getArgOperand(1);
if (!FMF.allowReassoc())
- Rdx = getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), MRK);
+ Rdx = getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), RK);
else {
if (!isPowerOf2_32(
cast<FixedVectorType>(Vec->getType())->getNumElements()))
continue;
- Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
+ Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), RK);
Rdx = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(ID),
Acc, Rdx, "bin.rdx");
}
break;
}
- case Intrinsic::experimental_vector_reduce_add:
- case Intrinsic::experimental_vector_reduce_mul:
- case Intrinsic::experimental_vector_reduce_and:
- case Intrinsic::experimental_vector_reduce_or:
- case Intrinsic::experimental_vector_reduce_xor:
- case Intrinsic::experimental_vector_reduce_smax:
- case Intrinsic::experimental_vector_reduce_smin:
- case Intrinsic::experimental_vector_reduce_umax:
- case Intrinsic::experimental_vector_reduce_umin:
- case Intrinsic::experimental_vector_reduce_fmax:
- case Intrinsic::experimental_vector_reduce_fmin: {
+ case Intrinsic::vector_reduce_add:
+ case Intrinsic::vector_reduce_mul:
+ case Intrinsic::vector_reduce_and:
+ case Intrinsic::vector_reduce_or:
+ case Intrinsic::vector_reduce_xor:
+ case Intrinsic::vector_reduce_smax:
+ case Intrinsic::vector_reduce_smin:
+ case Intrinsic::vector_reduce_umax:
+ case Intrinsic::vector_reduce_umin: {
Value *Vec = II->getArgOperand(0);
if (!isPowerOf2_32(
cast<FixedVectorType>(Vec->getType())->getNumElements()))
continue;
- Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
+ Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), RK);
+ break;
+ }
+ case Intrinsic::vector_reduce_fmax:
+ case Intrinsic::vector_reduce_fmin: {
+ // FIXME: We only expand 'fast' reductions here because the underlying
+ // code in createMinMaxOp() assumes that comparisons use 'fast'
+ // semantics.
+ Value *Vec = II->getArgOperand(0);
+ if (!isPowerOf2_32(
+ cast<FixedVectorType>(Vec->getType())->getNumElements()) ||
+ !FMF.isFast())
+ continue;
+
+ Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), RK);
break;
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp b/contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp
index 27319804049d..f8f99b7e87f2 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp
@@ -46,6 +46,20 @@ static cl::opt<bool> FixupSCSExtendSlotSize(
cl::desc("Allow spill in spill slot of greater size than register size"),
cl::Hidden);
+static cl::opt<bool> PassGCPtrInCSR(
+ "fixup-allow-gcptr-in-csr", cl::Hidden, cl::init(false),
+ cl::desc("Allow passing GC Pointer arguments in callee saved registers"));
+
+static cl::opt<bool> EnableCopyProp(
+ "fixup-scs-enable-copy-propagation", cl::Hidden, cl::init(true),
+ cl::desc("Enable simple copy propagation during register reloading"));
+
+// This is purely debugging option.
+// It may be handy for investigating statepoint spilling issues.
+static cl::opt<unsigned> MaxStatepointsWithRegs(
+ "fixup-max-csr-statepoints", cl::Hidden,
+ cl::desc("Max number of statepoints allowed to pass GC Ptrs in registers"));
+
namespace {
class FixupStatepointCallerSaved : public MachineFunctionPass {
@@ -67,6 +81,7 @@ public:
bool runOnMachineFunction(MachineFunction &MF) override;
};
+
} // End anonymous namespace.
char FixupStatepointCallerSaved::ID = 0;
@@ -83,7 +98,101 @@ static unsigned getRegisterSize(const TargetRegisterInfo &TRI, Register Reg) {
return TRI.getSpillSize(*RC);
}
+// Try to eliminate redundant copy to register which we're going to
+// spill, i.e. try to change:
+// X = COPY Y
+// SPILL X
+// to
+// SPILL Y
+// If there are no uses of X between copy and STATEPOINT, that COPY
+// may be eliminated.
+// Reg - register we're about to spill
+// RI - On entry points to statepoint.
+// On successful copy propagation set to new spill point.
+// IsKill - set to true if COPY is Kill (there are no uses of Y)
+// Returns either found source copy register or original one.
+static Register performCopyPropagation(Register Reg,
+ MachineBasicBlock::iterator &RI,
+ bool &IsKill, const TargetInstrInfo &TII,
+ const TargetRegisterInfo &TRI) {
+ // First check if statepoint itself uses Reg in non-meta operands.
+ int Idx = RI->findRegisterUseOperandIdx(Reg, false, &TRI);
+ if (Idx >= 0 && (unsigned)Idx < StatepointOpers(&*RI).getNumDeoptArgsIdx()) {
+ IsKill = false;
+ return Reg;
+ }
+
+ if (!EnableCopyProp)
+ return Reg;
+
+ MachineBasicBlock *MBB = RI->getParent();
+ MachineBasicBlock::reverse_iterator E = MBB->rend();
+ MachineInstr *Def = nullptr, *Use = nullptr;
+ for (auto It = ++(RI.getReverse()); It != E; ++It) {
+ if (It->readsRegister(Reg, &TRI) && !Use)
+ Use = &*It;
+ if (It->modifiesRegister(Reg, &TRI)) {
+ Def = &*It;
+ break;
+ }
+ }
+
+ if (!Def)
+ return Reg;
+
+ auto DestSrc = TII.isCopyInstr(*Def);
+ if (!DestSrc || DestSrc->Destination->getReg() != Reg)
+ return Reg;
+
+ Register SrcReg = DestSrc->Source->getReg();
+
+ if (getRegisterSize(TRI, Reg) != getRegisterSize(TRI, SrcReg))
+ return Reg;
+
+ LLVM_DEBUG(dbgs() << "spillRegisters: perform copy propagation "
+ << printReg(Reg, &TRI) << " -> " << printReg(SrcReg, &TRI)
+ << "\n");
+
+ // Insert spill immediately after Def
+ RI = ++MachineBasicBlock::iterator(Def);
+ IsKill = DestSrc->Source->isKill();
+
+ // There are no uses of original register between COPY and STATEPOINT.
+ // There can't be any after STATEPOINT, so we can eliminate Def.
+ if (!Use) {
+ LLVM_DEBUG(dbgs() << "spillRegisters: removing dead copy " << *Def);
+ Def->eraseFromParent();
+ }
+ return SrcReg;
+}
+
namespace {
+// Pair {Register, FrameIndex}
+using RegSlotPair = std::pair<Register, int>;
+
+// Keeps track of what reloads were inserted in MBB.
+class RegReloadCache {
+ using ReloadSet = SmallSet<RegSlotPair, 8>;
+ DenseMap<const MachineBasicBlock *, ReloadSet> Reloads;
+
+public:
+ RegReloadCache() = default;
+
+ // Record reload of Reg from FI in block MBB
+ void recordReload(Register Reg, int FI, const MachineBasicBlock *MBB) {
+ RegSlotPair RSP(Reg, FI);
+ auto Res = Reloads[MBB].insert(RSP);
+ (void)Res;
+ assert(Res.second && "reload already exists");
+ }
+
+ // Does basic block MBB contains reload of Reg from FI?
+ bool hasReload(Register Reg, int FI, const MachineBasicBlock *MBB) {
+ RegSlotPair RSP(Reg, FI);
+ return Reloads.count(MBB) && Reloads[MBB].count(RSP);
+ }
+};
+
// Cache used frame indexes during statepoint re-write to re-use them in
// processing next statepoint instruction.
// Two strategies. One is to preserve the size of spill slot while another one
@@ -105,24 +214,62 @@ private:
// size will be increased.
DenseMap<unsigned, FrameIndexesPerSize> Cache;
+ // Keeps track of slots reserved for the shared landing pad processing.
+ // Initialized from GlobalIndices for the current EHPad.
+ SmallSet<int, 8> ReservedSlots;
+
+ // Landing pad can be destination of several statepoints. Every register
+ // defined by such statepoints must be spilled to the same stack slot.
+ // This map keeps that information.
+ DenseMap<const MachineBasicBlock *, SmallVector<RegSlotPair, 8>>
+ GlobalIndices;
+
+ FrameIndexesPerSize &getCacheBucket(unsigned Size) {
+ // In FixupSCSExtendSlotSize mode the bucket with 0 index is used
+ // for all sizes.
+ return Cache[FixupSCSExtendSlotSize ? 0 : Size];
+ }
+
public:
FrameIndexesCache(MachineFrameInfo &MFI, const TargetRegisterInfo &TRI)
: MFI(MFI), TRI(TRI) {}
// Reset the current state of used frame indexes. After invocation of
- // this function all frame indexes are available for allocation.
- void reset() {
+ // this function all frame indexes are available for allocation with
+ // the exception of slots reserved for landing pad processing (if any).
+ void reset(const MachineBasicBlock *EHPad) {
for (auto &It : Cache)
It.second.Index = 0;
+
+ ReservedSlots.clear();
+ if (EHPad && GlobalIndices.count(EHPad))
+ for (auto &RSP : GlobalIndices[EHPad])
+ ReservedSlots.insert(RSP.second);
}
+
// Get frame index to spill the register.
- int getFrameIndex(Register Reg) {
+ int getFrameIndex(Register Reg, MachineBasicBlock *EHPad) {
+ // Check if slot for Reg is already reserved at EHPad.
+ auto It = GlobalIndices.find(EHPad);
+ if (It != GlobalIndices.end()) {
+ auto &Vec = It->second;
+ auto Idx = llvm::find_if(
+ Vec, [Reg](RegSlotPair &RSP) { return Reg == RSP.first; });
+ if (Idx != Vec.end()) {
+ int FI = Idx->second;
+ LLVM_DEBUG(dbgs() << "Found global FI " << FI << " for register "
+ << printReg(Reg, &TRI) << " at "
+ << printMBBReference(*EHPad) << "\n");
+ assert(ReservedSlots.count(FI) && "using unreserved slot");
+ return FI;
+ }
+ }
+
unsigned Size = getRegisterSize(TRI, Reg);
- // In FixupSCSExtendSlotSize mode the bucket with 0 index is used
- // for all sizes.
- unsigned Bucket = FixupSCSExtendSlotSize ? 0 : Size;
- FrameIndexesPerSize &Line = Cache[Bucket];
- if (Line.Index < Line.Slots.size()) {
+ FrameIndexesPerSize &Line = getCacheBucket(Size);
+ while (Line.Index < Line.Slots.size()) {
int FI = Line.Slots[Line.Index++];
+ if (ReservedSlots.count(FI))
+ continue;
// If all sizes are kept together we probably need to extend the
// spill slot size.
if (MFI.getObjectSize(FI) < Size) {
@@ -136,15 +283,25 @@ public:
NumSpillSlotsAllocated++;
Line.Slots.push_back(FI);
++Line.Index;
+
+ // Remember assignment {Reg, FI} for EHPad
+ if (EHPad) {
+ GlobalIndices[EHPad].push_back(std::make_pair(Reg, FI));
+ LLVM_DEBUG(dbgs() << "Reserved FI " << FI << " for spilling reg "
+ << printReg(Reg, &TRI) << " at landing pad "
+ << printMBBReference(*EHPad) << "\n");
+ }
+
return FI;
}
+
// Sort all registers to spill in descendent order. In the
// FixupSCSExtendSlotSize mode it will minimize the total frame size.
// In non FixupSCSExtendSlotSize mode we can skip this step.
void sortRegisters(SmallVectorImpl<Register> &Regs) {
if (!FixupSCSExtendSlotSize)
return;
- llvm::sort(Regs.begin(), Regs.end(), [&](Register &A, Register &B) {
+ llvm::sort(Regs, [&](Register &A, Register &B) {
return getRegisterSize(TRI, A) > getRegisterSize(TRI, B);
});
}
@@ -156,6 +313,8 @@ private:
// statepoint instruction.
MachineInstr &MI;
MachineFunction &MF;
+ // If non-null then statepoint is invoke, and this points to the landing pad.
+ MachineBasicBlock *EHPad;
const TargetRegisterInfo &TRI;
const TargetInstrInfo &TII;
MachineFrameInfo &MFI;
@@ -163,36 +322,77 @@ private:
const uint32_t *Mask;
// Cache of frame indexes used on previous instruction processing.
FrameIndexesCache &CacheFI;
+ bool AllowGCPtrInCSR;
// Operands with physical registers requiring spilling.
SmallVector<unsigned, 8> OpsToSpill;
// Set of register to spill.
SmallVector<Register, 8> RegsToSpill;
+ // Set of registers to reload after statepoint.
+ SmallVector<Register, 8> RegsToReload;
// Map Register to Frame Slot index.
DenseMap<Register, int> RegToSlotIdx;
public:
StatepointState(MachineInstr &MI, const uint32_t *Mask,
- FrameIndexesCache &CacheFI)
+ FrameIndexesCache &CacheFI, bool AllowGCPtrInCSR)
: MI(MI), MF(*MI.getMF()), TRI(*MF.getSubtarget().getRegisterInfo()),
TII(*MF.getSubtarget().getInstrInfo()), MFI(MF.getFrameInfo()),
- Mask(Mask), CacheFI(CacheFI) {}
+ Mask(Mask), CacheFI(CacheFI), AllowGCPtrInCSR(AllowGCPtrInCSR) {
+
+ // Find statepoint's landing pad, if any.
+ EHPad = nullptr;
+ MachineBasicBlock *MBB = MI.getParent();
+ // Invoke statepoint must be last one in block.
+ bool Last = std::none_of(++MI.getIterator(), MBB->end().getInstrIterator(),
+ [](MachineInstr &I) {
+ return I.getOpcode() == TargetOpcode::STATEPOINT;
+ });
+
+ if (!Last)
+ return;
+
+ auto IsEHPad = [](MachineBasicBlock *B) { return B->isEHPad(); };
+
+ assert(llvm::count_if(MBB->successors(), IsEHPad) < 2 && "multiple EHPads");
+
+ auto It = llvm::find_if(MBB->successors(), IsEHPad);
+ if (It != MBB->succ_end())
+ EHPad = *It;
+ }
+
+ MachineBasicBlock *getEHPad() const { return EHPad; }
+
// Return true if register is callee saved.
bool isCalleeSaved(Register Reg) { return (Mask[Reg / 32] >> Reg % 32) & 1; }
+
// Iterates over statepoint meta args to find caller saver registers.
// Also cache the size of found registers.
// Returns true if caller save registers found.
bool findRegistersToSpill() {
+ SmallSet<Register, 8> GCRegs;
+ // All GC pointer operands assigned to registers produce new value.
+ // Since they're tied to their defs, it is enough to collect def registers.
+ for (const auto &Def : MI.defs())
+ GCRegs.insert(Def.getReg());
+
SmallSet<Register, 8> VisitedRegs;
for (unsigned Idx = StatepointOpers(&MI).getVarIdx(),
EndIdx = MI.getNumOperands();
Idx < EndIdx; ++Idx) {
MachineOperand &MO = MI.getOperand(Idx);
- if (!MO.isReg() || MO.isImplicit())
+ // Leave `undef` operands as is, StackMaps will rewrite them
+ // into a constant.
+ if (!MO.isReg() || MO.isImplicit() || MO.isUndef())
continue;
Register Reg = MO.getReg();
assert(Reg.isPhysical() && "Only physical regs are expected");
- if (isCalleeSaved(Reg))
+
+ if (isCalleeSaved(Reg) && (AllowGCPtrInCSR || !is_contained(GCRegs, Reg)))
continue;
+
+ LLVM_DEBUG(dbgs() << "Will spill " << printReg(Reg, &TRI) << " at index "
+ << Idx << "\n");
+
if (VisitedRegs.insert(Reg).second)
RegsToSpill.push_back(Reg);
OpsToSpill.push_back(Idx);
@@ -200,30 +400,109 @@ public:
CacheFI.sortRegisters(RegsToSpill);
return !RegsToSpill.empty();
}
+
// Spill all caller saved registers right before statepoint instruction.
// Remember frame index where register is spilled.
void spillRegisters() {
for (Register Reg : RegsToSpill) {
- int FI = CacheFI.getFrameIndex(Reg);
+ int FI = CacheFI.getFrameIndex(Reg, EHPad);
const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(Reg);
- TII.storeRegToStackSlot(*MI.getParent(), MI, Reg, true /*is_Kill*/, FI,
- RC, &TRI);
+
NumSpilledRegisters++;
RegToSlotIdx[Reg] = FI;
+
+ LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, &TRI) << " to FI " << FI
+ << "\n");
+
+ // Perform trivial copy propagation
+ bool IsKill = true;
+ MachineBasicBlock::iterator InsertBefore(MI);
+ Reg = performCopyPropagation(Reg, InsertBefore, IsKill, TII, TRI);
+
+ LLVM_DEBUG(dbgs() << "Insert spill before " << *InsertBefore);
+ TII.storeRegToStackSlot(*MI.getParent(), InsertBefore, Reg, IsKill, FI,
+ RC, &TRI);
+ }
+ }
+
+ void insertReloadBefore(unsigned Reg, MachineBasicBlock::iterator It,
+ MachineBasicBlock *MBB) {
+ const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(Reg);
+ int FI = RegToSlotIdx[Reg];
+ if (It != MBB->end()) {
+ TII.loadRegFromStackSlot(*MBB, It, Reg, FI, RC, &TRI);
+ return;
}
+
+ // To insert reload at the end of MBB, insert it before last instruction
+ // and then swap them.
+ assert(!MBB->empty() && "Empty block");
+ --It;
+ TII.loadRegFromStackSlot(*MBB, It, Reg, FI, RC, &TRI);
+ MachineInstr *Reload = It->getPrevNode();
+ int Dummy = 0;
+ (void)Dummy;
+ assert(TII.isLoadFromStackSlot(*Reload, Dummy) == Reg);
+ assert(Dummy == FI);
+ MBB->remove(Reload);
+ MBB->insertAfter(It, Reload);
}
+
+ // Insert reloads of (relocated) registers spilled in statepoint.
+ void insertReloads(MachineInstr *NewStatepoint, RegReloadCache &RC) {
+ MachineBasicBlock *MBB = NewStatepoint->getParent();
+ auto InsertPoint = std::next(NewStatepoint->getIterator());
+
+ for (auto Reg : RegsToReload) {
+ insertReloadBefore(Reg, InsertPoint, MBB);
+ LLVM_DEBUG(dbgs() << "Reloading " << printReg(Reg, &TRI) << " from FI "
+ << RegToSlotIdx[Reg] << " after statepoint\n");
+
+ if (EHPad && !RC.hasReload(Reg, RegToSlotIdx[Reg], EHPad)) {
+ RC.recordReload(Reg, RegToSlotIdx[Reg], EHPad);
+ auto EHPadInsertPoint = EHPad->SkipPHIsLabelsAndDebug(EHPad->begin());
+ insertReloadBefore(Reg, EHPadInsertPoint, EHPad);
+ LLVM_DEBUG(dbgs() << "...also reload at EHPad "
+ << printMBBReference(*EHPad) << "\n");
+ }
+ }
+ }
+
// Re-write statepoint machine instruction to replace caller saved operands
// with indirect memory location (frame index).
- void rewriteStatepoint() {
+ MachineInstr *rewriteStatepoint() {
MachineInstr *NewMI =
MF.CreateMachineInstr(TII.get(MI.getOpcode()), MI.getDebugLoc(), true);
MachineInstrBuilder MIB(MF, NewMI);
+ unsigned NumOps = MI.getNumOperands();
+
+ // New indices for the remaining defs.
+ SmallVector<unsigned, 8> NewIndices;
+ unsigned NumDefs = MI.getNumDefs();
+ for (unsigned I = 0; I < NumDefs; ++I) {
+ MachineOperand &DefMO = MI.getOperand(I);
+ assert(DefMO.isReg() && DefMO.isDef() && "Expected Reg Def operand");
+ Register Reg = DefMO.getReg();
+ if (!AllowGCPtrInCSR) {
+ assert(is_contained(RegsToSpill, Reg));
+ RegsToReload.push_back(Reg);
+ } else {
+ if (isCalleeSaved(Reg)) {
+ NewIndices.push_back(NewMI->getNumOperands());
+ MIB.addReg(Reg, RegState::Define);
+ } else {
+ NewIndices.push_back(NumOps);
+ RegsToReload.push_back(Reg);
+ }
+ }
+ }
+
// Add End marker.
OpsToSpill.push_back(MI.getNumOperands());
unsigned CurOpIdx = 0;
- for (unsigned I = 0; I < MI.getNumOperands(); ++I) {
+ for (unsigned I = NumDefs; I < MI.getNumOperands(); ++I) {
MachineOperand &MO = MI.getOperand(I);
if (I == OpsToSpill[CurOpIdx]) {
int FI = RegToSlotIdx[MO.getReg()];
@@ -234,23 +513,38 @@ public:
MIB.addFrameIndex(FI);
MIB.addImm(0);
++CurOpIdx;
- } else
+ } else {
MIB.add(MO);
+ unsigned OldDef;
+ if (AllowGCPtrInCSR && MI.isRegTiedToDefOperand(I, &OldDef)) {
+ assert(OldDef < NumDefs);
+ assert(NewIndices[OldDef] < NumOps);
+ MIB->tieOperands(NewIndices[OldDef], MIB->getNumOperands() - 1);
+ }
+ }
}
assert(CurOpIdx == (OpsToSpill.size() - 1) && "Not all operands processed");
// Add mem operands.
NewMI->setMemRefs(MF, MI.memoperands());
for (auto It : RegToSlotIdx) {
+ Register R = It.first;
int FrameIndex = It.second;
auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
- auto *MMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
- getRegisterSize(TRI, It.first),
- MFI.getObjectAlign(FrameIndex));
+ MachineMemOperand::Flags Flags = MachineMemOperand::MOLoad;
+ if (is_contained(RegsToReload, R))
+ Flags |= MachineMemOperand::MOStore;
+ auto *MMO =
+ MF.getMachineMemOperand(PtrInfo, Flags, getRegisterSize(TRI, R),
+ MFI.getObjectAlign(FrameIndex));
NewMI->addMemOperand(MF, MMO);
}
+
// Insert new statepoint and erase old one.
MI.getParent()->insert(MI, NewMI);
+
+ LLVM_DEBUG(dbgs() << "rewritten statepoint to : " << *NewMI << "\n");
MI.eraseFromParent();
+ return NewMI;
}
};
@@ -259,28 +553,33 @@ private:
MachineFunction &MF;
const TargetRegisterInfo &TRI;
FrameIndexesCache CacheFI;
+ RegReloadCache ReloadCache;
public:
StatepointProcessor(MachineFunction &MF)
: MF(MF), TRI(*MF.getSubtarget().getRegisterInfo()),
CacheFI(MF.getFrameInfo(), TRI) {}
- bool process(MachineInstr &MI) {
+ bool process(MachineInstr &MI, bool AllowGCPtrInCSR) {
StatepointOpers SO(&MI);
uint64_t Flags = SO.getFlags();
// Do nothing for LiveIn, it supports all registers.
if (Flags & (uint64_t)StatepointFlags::DeoptLiveIn)
return false;
+ LLVM_DEBUG(dbgs() << "\nMBB " << MI.getParent()->getNumber() << " "
+ << MI.getParent()->getName() << " : process statepoint "
+ << MI);
CallingConv::ID CC = SO.getCallingConv();
const uint32_t *Mask = TRI.getCallPreservedMask(MF, CC);
- CacheFI.reset();
- StatepointState SS(MI, Mask, CacheFI);
+ StatepointState SS(MI, Mask, CacheFI, AllowGCPtrInCSR);
+ CacheFI.reset(SS.getEHPad());
if (!SS.findRegistersToSpill())
return false;
SS.spillRegisters();
- SS.rewriteStatepoint();
+ auto *NewStatepoint = SS.rewriteStatepoint();
+ SS.insertReloads(NewStatepoint, ReloadCache);
return true;
}
};
@@ -305,7 +604,14 @@ bool FixupStatepointCallerSaved::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
StatepointProcessor SPP(MF);
- for (MachineInstr *I : Statepoints)
- Changed |= SPP.process(*I);
+ unsigned NumStatepoints = 0;
+ bool AllowGCPtrInCSR = PassGCPtrInCSR;
+ for (MachineInstr *I : Statepoints) {
+ ++NumStatepoints;
+ if (MaxStatepointsWithRegs.getNumOccurrences() &&
+ NumStatepoints >= MaxStatepointsWithRegs)
+ AllowGCPtrInCSR = false;
+ Changed |= SPP.process(*I, AllowGCPtrInCSR);
+ }
return Changed;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GCRootLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GCRootLowering.cpp
index c6730aa6b00d..e2ee0c97f94d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GCRootLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GCRootLowering.cpp
@@ -296,7 +296,10 @@ void GCMachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) {
} else {
Register FrameReg; // FIXME: surely GCRoot ought to store the
// register that the offset is from?
- RI->StackOffset = TFI->getFrameIndexReference(MF, RI->Num, FrameReg);
+ auto FrameOffset = TFI->getFrameIndexReference(MF, RI->Num, FrameReg);
+ assert(!FrameOffset.getScalable() &&
+ "Frame offsets with a scalable component are not supported");
+ RI->StackOffset = FrameOffset.getFixed();
++RI;
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp
index c4d8777615d2..2fa208fbfaaf 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp
@@ -59,6 +59,7 @@ bool CSEConfigFull::shouldCSEOpc(unsigned Opc) {
case TargetOpcode::G_UNMERGE_VALUES:
case TargetOpcode::G_TRUNC:
case TargetOpcode::G_PTR_ADD:
+ case TargetOpcode::G_EXTRACT:
return true;
}
return false;
@@ -366,23 +367,30 @@ GISelInstProfileBuilder::addNodeIDFlag(unsigned Flag) const {
return *this;
}
+const GISelInstProfileBuilder &
+GISelInstProfileBuilder::addNodeIDReg(Register Reg) const {
+ LLT Ty = MRI.getType(Reg);
+ if (Ty.isValid())
+ addNodeIDRegType(Ty);
+
+ if (const RegClassOrRegBank &RCOrRB = MRI.getRegClassOrRegBank(Reg)) {
+ if (const auto *RB = RCOrRB.dyn_cast<const RegisterBank *>())
+ addNodeIDRegType(RB);
+ else if (const auto *RC = RCOrRB.dyn_cast<const TargetRegisterClass *>())
+ addNodeIDRegType(RC);
+ }
+ return *this;
+}
+
const GISelInstProfileBuilder &GISelInstProfileBuilder::addNodeIDMachineOperand(
const MachineOperand &MO) const {
if (MO.isReg()) {
Register Reg = MO.getReg();
if (!MO.isDef())
addNodeIDRegNum(Reg);
- LLT Ty = MRI.getType(Reg);
- if (Ty.isValid())
- addNodeIDRegType(Ty);
-
- if (const RegClassOrRegBank &RCOrRB = MRI.getRegClassOrRegBank(Reg)) {
- if (const auto *RB = RCOrRB.dyn_cast<const RegisterBank *>())
- addNodeIDRegType(RB);
- else if (const auto *RC = RCOrRB.dyn_cast<const TargetRegisterClass *>())
- addNodeIDRegType(RC);
- }
+ // Profile the register properties.
+ addNodeIDReg(Reg);
assert(!MO.isImplicit() && "Unhandled case");
} else if (MO.isImm())
ID.AddInteger(MO.getImm());
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
index 88173dc4d302..2c86f06a602d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
@@ -13,6 +13,7 @@
#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
+#include "llvm/IR/DebugInfoMetadata.h"
using namespace llvm;
@@ -41,8 +42,14 @@ CSEMIRBuilder::getDominatingInstrForID(FoldingSetNodeID &ID,
if (MI) {
CSEInfo->countOpcodeHit(MI->getOpcode());
auto CurrPos = getInsertPt();
- if (!dominates(MI, CurrPos))
+ auto MII = MachineBasicBlock::iterator(MI);
+ if (MII == CurrPos) {
+ // Move the insert point ahead of the instruction so any future uses of
+ // this builder will have the def ready.
+ setInsertPt(*CurMBB, std::next(MII));
+ } else if (!dominates(MI, CurrPos)) {
CurMBB->splice(CurrPos, CurMBB, MI);
+ }
return MachineInstrBuilder(getMF(), MI);
}
return MachineInstrBuilder();
@@ -61,6 +68,11 @@ void CSEMIRBuilder::profileDstOp(const DstOp &Op,
case DstOp::DstType::Ty_RC:
B.addNodeIDRegType(Op.getRegClass());
break;
+ case DstOp::DstType::Ty_Reg: {
+ // Regs can have LLT&(RB|RC). If those exist, profile them as well.
+ B.addNodeIDReg(Op.getReg());
+ break;
+ }
default:
B.addNodeIDRegType(Op.getLLTTy(*getMRI()));
break;
@@ -70,6 +82,9 @@ void CSEMIRBuilder::profileDstOp(const DstOp &Op,
void CSEMIRBuilder::profileSrcOp(const SrcOp &Op,
GISelInstProfileBuilder &B) const {
switch (Op.getSrcOpKind()) {
+ case SrcOp::SrcType::Ty_Imm:
+ B.addNodeIDImmediate(static_cast<int64_t>(Op.getImm()));
+ break;
case SrcOp::SrcType::Ty_Predicate:
B.addNodeIDImmediate(static_cast<int64_t>(Op.getPredicate()));
break;
@@ -115,7 +130,7 @@ bool CSEMIRBuilder::checkCopyToDefsPossible(ArrayRef<DstOp> DstOps) {
if (DstOps.size() == 1)
return true; // always possible to emit copy to just 1 vreg.
- return std::all_of(DstOps.begin(), DstOps.end(), [](const DstOp &Op) {
+ return llvm::all_of(DstOps, [](const DstOp &Op) {
DstOp::DstType DT = Op.getDstOpKind();
return DT == DstOp::DstType::Ty_LLT || DT == DstOp::DstType::Ty_RC;
});
@@ -131,6 +146,21 @@ CSEMIRBuilder::generateCopiesIfRequired(ArrayRef<DstOp> DstOps,
if (Op.getDstOpKind() == DstOp::DstType::Ty_Reg)
return buildCopy(Op.getReg(), MIB.getReg(0));
}
+
+ // If we didn't generate a copy then we're re-using an existing node directly
+ // instead of emitting any code. Merge the debug location we wanted to emit
+ // into the instruction we're CSE'ing with. Debug locations arent part of the
+ // profile so we don't need to recompute it.
+ if (getDebugLoc()) {
+ GISelChangeObserver *Observer = getState().Observer;
+ if (Observer)
+ Observer->changingInstr(*MIB);
+ MIB->setDebugLoc(
+ DILocation::getMergedLocation(MIB->getDebugLoc(), getDebugLoc()));
+ if (Observer)
+ Observer->changedInstr(*MIB);
+ }
+
return MIB;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 1be0ca441205..803e1527a4f0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -30,6 +30,51 @@ using namespace llvm;
void CallLowering::anchor() {}
+/// Helper function which updates \p Flags when \p AttrFn returns true.
+static void
+addFlagsUsingAttrFn(ISD::ArgFlagsTy &Flags,
+ const std::function<bool(Attribute::AttrKind)> &AttrFn) {
+ if (AttrFn(Attribute::SExt))
+ Flags.setSExt();
+ if (AttrFn(Attribute::ZExt))
+ Flags.setZExt();
+ if (AttrFn(Attribute::InReg))
+ Flags.setInReg();
+ if (AttrFn(Attribute::StructRet))
+ Flags.setSRet();
+ if (AttrFn(Attribute::Nest))
+ Flags.setNest();
+ if (AttrFn(Attribute::ByVal))
+ Flags.setByVal();
+ if (AttrFn(Attribute::Preallocated))
+ Flags.setPreallocated();
+ if (AttrFn(Attribute::InAlloca))
+ Flags.setInAlloca();
+ if (AttrFn(Attribute::Returned))
+ Flags.setReturned();
+ if (AttrFn(Attribute::SwiftSelf))
+ Flags.setSwiftSelf();
+ if (AttrFn(Attribute::SwiftError))
+ Flags.setSwiftError();
+}
+
+ISD::ArgFlagsTy CallLowering::getAttributesForArgIdx(const CallBase &Call,
+ unsigned ArgIdx) const {
+ ISD::ArgFlagsTy Flags;
+ addFlagsUsingAttrFn(Flags, [&Call, &ArgIdx](Attribute::AttrKind Attr) {
+ return Call.paramHasAttr(ArgIdx, Attr);
+ });
+ return Flags;
+}
+
+void CallLowering::addArgFlagsFromAttributes(ISD::ArgFlagsTy &Flags,
+ const AttributeList &Attrs,
+ unsigned OpIdx) const {
+ addFlagsUsingAttrFn(Flags, [&Attrs, &OpIdx](Attribute::AttrKind Attr) {
+ return Attrs.hasAttribute(OpIdx, Attr);
+ });
+}
+
bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,
ArrayRef<Register> ResRegs,
ArrayRef<ArrayRef<Register>> ArgRegs,
@@ -37,6 +82,29 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,
std::function<unsigned()> GetCalleeReg) const {
CallLoweringInfo Info;
const DataLayout &DL = MIRBuilder.getDataLayout();
+ MachineFunction &MF = MIRBuilder.getMF();
+ bool CanBeTailCalled = CB.isTailCall() &&
+ isInTailCallPosition(CB, MF.getTarget()) &&
+ (MF.getFunction()
+ .getFnAttribute("disable-tail-calls")
+ .getValueAsString() != "true");
+
+ CallingConv::ID CallConv = CB.getCallingConv();
+ Type *RetTy = CB.getType();
+ bool IsVarArg = CB.getFunctionType()->isVarArg();
+
+ SmallVector<BaseArgInfo, 4> SplitArgs;
+ getReturnInfo(CallConv, RetTy, CB.getAttributes(), SplitArgs, DL);
+ Info.CanLowerReturn = canLowerReturn(MF, CallConv, SplitArgs, IsVarArg);
+
+ if (!Info.CanLowerReturn) {
+ // Callee requires sret demotion.
+ insertSRetOutgoingArgument(MIRBuilder, CB, Info);
+
+ // The sret demotion isn't compatible with tail-calls, since the sret
+ // argument points into the caller's stack frame.
+ CanBeTailCalled = false;
+ }
// First step is to marshall all the function's parameters into the correct
// physregs and memory locations. Gather the sequence of argument types that
@@ -44,9 +112,15 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,
unsigned i = 0;
unsigned NumFixedArgs = CB.getFunctionType()->getNumParams();
for (auto &Arg : CB.args()) {
- ArgInfo OrigArg{ArgRegs[i], Arg->getType(), ISD::ArgFlagsTy{},
+ ArgInfo OrigArg{ArgRegs[i], Arg->getType(), getAttributesForArgIdx(CB, i),
i < NumFixedArgs};
setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, CB);
+
+ // If we have an explicit sret argument that is an Instruction, (i.e., it
+ // might point to function-local memory), we can't meaningfully tail-call.
+ if (OrigArg.Flags[0].isSRet() && isa<Instruction>(&Arg))
+ CanBeTailCalled = false;
+
Info.OrigArgs.push_back(OrigArg);
++i;
}
@@ -59,21 +133,16 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,
else
Info.Callee = MachineOperand::CreateReg(GetCalleeReg(), false);
- Info.OrigRet = ArgInfo{ResRegs, CB.getType(), ISD::ArgFlagsTy{}};
+ Info.OrigRet = ArgInfo{ResRegs, RetTy, ISD::ArgFlagsTy{}};
if (!Info.OrigRet.Ty->isVoidTy())
setArgFlags(Info.OrigRet, AttributeList::ReturnIndex, DL, CB);
- MachineFunction &MF = MIRBuilder.getMF();
Info.KnownCallees = CB.getMetadata(LLVMContext::MD_callees);
- Info.CallConv = CB.getCallingConv();
+ Info.CallConv = CallConv;
Info.SwiftErrorVReg = SwiftErrorVReg;
Info.IsMustTailCall = CB.isMustTailCall();
- Info.IsTailCall =
- CB.isTailCall() && isInTailCallPosition(CB, MF.getTarget()) &&
- (MF.getFunction()
- .getFnAttribute("disable-tail-calls")
- .getValueAsString() != "true");
- Info.IsVarArg = CB.getFunctionType()->isVarArg();
+ Info.IsTailCall = CanBeTailCalled;
+ Info.IsVarArg = IsVarArg;
return lowerCall(MIRBuilder, Info);
}
@@ -83,24 +152,7 @@ void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx,
const FuncInfoTy &FuncInfo) const {
auto &Flags = Arg.Flags[0];
const AttributeList &Attrs = FuncInfo.getAttributes();
- if (Attrs.hasAttribute(OpIdx, Attribute::ZExt))
- Flags.setZExt();
- if (Attrs.hasAttribute(OpIdx, Attribute::SExt))
- Flags.setSExt();
- if (Attrs.hasAttribute(OpIdx, Attribute::InReg))
- Flags.setInReg();
- if (Attrs.hasAttribute(OpIdx, Attribute::StructRet))
- Flags.setSRet();
- if (Attrs.hasAttribute(OpIdx, Attribute::SwiftSelf))
- Flags.setSwiftSelf();
- if (Attrs.hasAttribute(OpIdx, Attribute::SwiftError))
- Flags.setSwiftError();
- if (Attrs.hasAttribute(OpIdx, Attribute::ByVal))
- Flags.setByVal();
- if (Attrs.hasAttribute(OpIdx, Attribute::Preallocated))
- Flags.setPreallocated();
- if (Attrs.hasAttribute(OpIdx, Attribute::InAlloca))
- Flags.setInAlloca();
+ addArgFlagsFromAttributes(Flags, Attrs, OpIdx);
if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated()) {
Type *ElementTy = cast<PointerType>(Arg.Ty)->getElementType();
@@ -117,8 +169,6 @@ void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx,
FrameAlign = Align(getTLI()->getByValTypeAlignment(ElementTy, DL));
Flags.setByValAlign(FrameAlign);
}
- if (Attrs.hasAttribute(OpIdx, Attribute::Nest))
- Flags.setNest();
Flags.setOrigAlign(DL.getABITypeAlign(Arg.Ty));
}
@@ -195,99 +245,97 @@ bool CallLowering::handleAssignments(CCState &CCInfo,
unsigned NumArgs = Args.size();
for (unsigned i = 0; i != NumArgs; ++i) {
EVT CurVT = EVT::getEVT(Args[i].Ty);
- if (!CurVT.isSimple() ||
- Handler.assignArg(i, CurVT.getSimpleVT(), CurVT.getSimpleVT(),
- CCValAssign::Full, Args[i], Args[i].Flags[0],
- CCInfo)) {
- MVT NewVT = TLI->getRegisterTypeForCallingConv(
- F.getContext(), F.getCallingConv(), EVT(CurVT));
-
- // If we need to split the type over multiple regs, check it's a scenario
- // we currently support.
- unsigned NumParts = TLI->getNumRegistersForCallingConv(
- F.getContext(), F.getCallingConv(), CurVT);
- if (NumParts > 1) {
- // For now only handle exact splits.
- if (NewVT.getSizeInBits() * NumParts != CurVT.getSizeInBits())
- return false;
- }
+ if (CurVT.isSimple() &&
+ !Handler.assignArg(i, CurVT.getSimpleVT(), CurVT.getSimpleVT(),
+ CCValAssign::Full, Args[i], Args[i].Flags[0],
+ CCInfo))
+ continue;
+
+ MVT NewVT = TLI->getRegisterTypeForCallingConv(
+ F.getContext(), F.getCallingConv(), EVT(CurVT));
+
+ // If we need to split the type over multiple regs, check it's a scenario
+ // we currently support.
+ unsigned NumParts = TLI->getNumRegistersForCallingConv(
+ F.getContext(), F.getCallingConv(), CurVT);
+
+ if (NumParts == 1) {
+ // Try to use the register type if we couldn't assign the VT.
+ if (Handler.assignArg(i, NewVT, NewVT, CCValAssign::Full, Args[i],
+ Args[i].Flags[0], CCInfo))
+ return false;
+ continue;
+ }
- // For incoming arguments (physregs to vregs), we could have values in
- // physregs (or memlocs) which we want to extract and copy to vregs.
- // During this, we might have to deal with the LLT being split across
- // multiple regs, so we have to record this information for later.
- //
- // If we have outgoing args, then we have the opposite case. We have a
- // vreg with an LLT which we want to assign to a physical location, and
- // we might have to record that the value has to be split later.
- if (Handler.isIncomingArgumentHandler()) {
- if (NumParts == 1) {
- // Try to use the register type if we couldn't assign the VT.
- if (Handler.assignArg(i, NewVT, NewVT, CCValAssign::Full, Args[i],
- Args[i].Flags[0], CCInfo))
- return false;
+ assert(NumParts > 1);
+ // For now only handle exact splits.
+ if (NewVT.getSizeInBits() * NumParts != CurVT.getSizeInBits())
+ return false;
+
+ // For incoming arguments (physregs to vregs), we could have values in
+ // physregs (or memlocs) which we want to extract and copy to vregs.
+ // During this, we might have to deal with the LLT being split across
+ // multiple regs, so we have to record this information for later.
+ //
+ // If we have outgoing args, then we have the opposite case. We have a
+ // vreg with an LLT which we want to assign to a physical location, and
+ // we might have to record that the value has to be split later.
+ if (Handler.isIncomingArgumentHandler()) {
+ // We're handling an incoming arg which is split over multiple regs.
+ // E.g. passing an s128 on AArch64.
+ ISD::ArgFlagsTy OrigFlags = Args[i].Flags[0];
+ Args[i].OrigRegs.push_back(Args[i].Regs[0]);
+ Args[i].Regs.clear();
+ Args[i].Flags.clear();
+ LLT NewLLT = getLLTForMVT(NewVT);
+ // For each split register, create and assign a vreg that will store
+ // the incoming component of the larger value. These will later be
+ // merged to form the final vreg.
+ for (unsigned Part = 0; Part < NumParts; ++Part) {
+ Register Reg =
+ MIRBuilder.getMRI()->createGenericVirtualRegister(NewLLT);
+ ISD::ArgFlagsTy Flags = OrigFlags;
+ if (Part == 0) {
+ Flags.setSplit();
} else {
- // We're handling an incoming arg which is split over multiple regs.
- // E.g. passing an s128 on AArch64.
- ISD::ArgFlagsTy OrigFlags = Args[i].Flags[0];
- Args[i].OrigRegs.push_back(Args[i].Regs[0]);
- Args[i].Regs.clear();
- Args[i].Flags.clear();
- LLT NewLLT = getLLTForMVT(NewVT);
- // For each split register, create and assign a vreg that will store
- // the incoming component of the larger value. These will later be
- // merged to form the final vreg.
- for (unsigned Part = 0; Part < NumParts; ++Part) {
- Register Reg =
- MIRBuilder.getMRI()->createGenericVirtualRegister(NewLLT);
- ISD::ArgFlagsTy Flags = OrigFlags;
- if (Part == 0) {
- Flags.setSplit();
- } else {
- Flags.setOrigAlign(Align(1));
- if (Part == NumParts - 1)
- Flags.setSplitEnd();
- }
- Args[i].Regs.push_back(Reg);
- Args[i].Flags.push_back(Flags);
- if (Handler.assignArg(i + Part, NewVT, NewVT, CCValAssign::Full,
- Args[i], Args[i].Flags[Part], CCInfo)) {
- // Still couldn't assign this smaller part type for some reason.
- return false;
- }
- }
+ Flags.setOrigAlign(Align(1));
+ if (Part == NumParts - 1)
+ Flags.setSplitEnd();
}
- } else {
- // Handling an outgoing arg that might need to be split.
- if (NumParts < 2)
- return false; // Don't know how to deal with this type combination.
-
- // This type is passed via multiple registers in the calling convention.
- // We need to extract the individual parts.
- Register LargeReg = Args[i].Regs[0];
- LLT SmallTy = LLT::scalar(NewVT.getSizeInBits());
- auto Unmerge = MIRBuilder.buildUnmerge(SmallTy, LargeReg);
- assert(Unmerge->getNumOperands() == NumParts + 1);
- ISD::ArgFlagsTy OrigFlags = Args[i].Flags[0];
- // We're going to replace the regs and flags with the split ones.
- Args[i].Regs.clear();
- Args[i].Flags.clear();
- for (unsigned PartIdx = 0; PartIdx < NumParts; ++PartIdx) {
- ISD::ArgFlagsTy Flags = OrigFlags;
- if (PartIdx == 0) {
- Flags.setSplit();
- } else {
- Flags.setOrigAlign(Align(1));
- if (PartIdx == NumParts - 1)
- Flags.setSplitEnd();
- }
- Args[i].Regs.push_back(Unmerge.getReg(PartIdx));
- Args[i].Flags.push_back(Flags);
- if (Handler.assignArg(i + PartIdx, NewVT, NewVT, CCValAssign::Full,
- Args[i], Args[i].Flags[PartIdx], CCInfo))
- return false;
+ Args[i].Regs.push_back(Reg);
+ Args[i].Flags.push_back(Flags);
+ if (Handler.assignArg(i, NewVT, NewVT, CCValAssign::Full, Args[i],
+ Args[i].Flags[Part], CCInfo)) {
+ // Still couldn't assign this smaller part type for some reason.
+ return false;
}
}
+ } else {
+ // This type is passed via multiple registers in the calling convention.
+ // We need to extract the individual parts.
+ Register LargeReg = Args[i].Regs[0];
+ LLT SmallTy = LLT::scalar(NewVT.getSizeInBits());
+ auto Unmerge = MIRBuilder.buildUnmerge(SmallTy, LargeReg);
+ assert(Unmerge->getNumOperands() == NumParts + 1);
+ ISD::ArgFlagsTy OrigFlags = Args[i].Flags[0];
+ // We're going to replace the regs and flags with the split ones.
+ Args[i].Regs.clear();
+ Args[i].Flags.clear();
+ for (unsigned PartIdx = 0; PartIdx < NumParts; ++PartIdx) {
+ ISD::ArgFlagsTy Flags = OrigFlags;
+ if (PartIdx == 0) {
+ Flags.setSplit();
+ } else {
+ Flags.setOrigAlign(Align(1));
+ if (PartIdx == NumParts - 1)
+ Flags.setSplitEnd();
+ }
+ Args[i].Regs.push_back(Unmerge.getReg(PartIdx));
+ Args[i].Flags.push_back(Flags);
+ if (Handler.assignArg(i, NewVT, NewVT, CCValAssign::Full,
+ Args[i], Args[i].Flags[PartIdx], CCInfo))
+ return false;
+ }
}
}
@@ -313,85 +361,239 @@ bool CallLowering::handleAssignments(CCState &CCInfo,
EVT VAVT = VA.getValVT();
const LLT OrigTy = getLLTForType(*Args[i].Ty, DL);
- if (VA.isRegLoc()) {
- if (Handler.isIncomingArgumentHandler() && VAVT != OrigVT) {
- if (VAVT.getSizeInBits() < OrigVT.getSizeInBits()) {
- // Expected to be multiple regs for a single incoming arg.
- unsigned NumArgRegs = Args[i].Regs.size();
- if (NumArgRegs < 2)
- return false;
-
- assert((j + (NumArgRegs - 1)) < ArgLocs.size() &&
- "Too many regs for number of args");
- for (unsigned Part = 0; Part < NumArgRegs; ++Part) {
- // There should be Regs.size() ArgLocs per argument.
- VA = ArgLocs[j + Part];
- Handler.assignValueToReg(Args[i].Regs[Part], VA.getLocReg(), VA);
- }
- j += NumArgRegs - 1;
- // Merge the split registers into the expected larger result vreg
- // of the original call.
- MIRBuilder.buildMerge(Args[i].OrigRegs[0], Args[i].Regs);
- continue;
- }
- const LLT VATy(VAVT.getSimpleVT());
- Register NewReg =
- MIRBuilder.getMRI()->createGenericVirtualRegister(VATy);
- Handler.assignValueToReg(NewReg, VA.getLocReg(), VA);
- // If it's a vector type, we either need to truncate the elements
- // or do an unmerge to get the lower block of elements.
- if (VATy.isVector() &&
- VATy.getNumElements() > OrigVT.getVectorNumElements()) {
- // Just handle the case where the VA type is 2 * original type.
- if (VATy.getNumElements() != OrigVT.getVectorNumElements() * 2) {
- LLVM_DEBUG(dbgs()
- << "Incoming promoted vector arg has too many elts");
- return false;
- }
- auto Unmerge = MIRBuilder.buildUnmerge({OrigTy, OrigTy}, {NewReg});
- MIRBuilder.buildCopy(ArgReg, Unmerge.getReg(0));
- } else {
- MIRBuilder.buildTrunc(ArgReg, {NewReg}).getReg(0);
+ // Expected to be multiple regs for a single incoming arg.
+ // There should be Regs.size() ArgLocs per argument.
+ unsigned NumArgRegs = Args[i].Regs.size();
+
+ assert((j + (NumArgRegs - 1)) < ArgLocs.size() &&
+ "Too many regs for number of args");
+ for (unsigned Part = 0; Part < NumArgRegs; ++Part) {
+ // There should be Regs.size() ArgLocs per argument.
+ VA = ArgLocs[j + Part];
+ if (VA.isMemLoc()) {
+ // Don't currently support loading/storing a type that needs to be split
+ // to the stack. Should be easy, just not implemented yet.
+ if (NumArgRegs > 1) {
+ LLVM_DEBUG(
+ dbgs()
+ << "Load/store a split arg to/from the stack not implemented yet\n");
+ return false;
}
- } else if (!Handler.isIncomingArgumentHandler()) {
- assert((j + (Args[i].Regs.size() - 1)) < ArgLocs.size() &&
- "Too many regs for number of args");
- // This is an outgoing argument that might have been split.
- for (unsigned Part = 0; Part < Args[i].Regs.size(); ++Part) {
- // There should be Regs.size() ArgLocs per argument.
- VA = ArgLocs[j + Part];
- Handler.assignValueToReg(Args[i].Regs[Part], VA.getLocReg(), VA);
+
+ // FIXME: Use correct address space for pointer size
+ EVT LocVT = VA.getValVT();
+ unsigned MemSize = LocVT == MVT::iPTR ? DL.getPointerSize()
+ : LocVT.getStoreSize();
+ unsigned Offset = VA.getLocMemOffset();
+ MachinePointerInfo MPO;
+ Register StackAddr = Handler.getStackAddress(MemSize, Offset, MPO);
+ Handler.assignValueToAddress(Args[i], StackAddr,
+ MemSize, MPO, VA);
+ continue;
+ }
+
+ assert(VA.isRegLoc() && "custom loc should have been handled already");
+
+ // GlobalISel does not currently work for scalable vectors.
+ if (OrigVT.getFixedSizeInBits() >= VAVT.getFixedSizeInBits() ||
+ !Handler.isIncomingArgumentHandler()) {
+ // This is an argument that might have been split. There should be
+ // Regs.size() ArgLocs per argument.
+
+ // Insert the argument copies. If VAVT < OrigVT, we'll insert the merge
+ // to the original register after handling all of the parts.
+ Handler.assignValueToReg(Args[i].Regs[Part], VA.getLocReg(), VA);
+ continue;
+ }
+
+ // This ArgLoc covers multiple pieces, so we need to split it.
+ const LLT VATy(VAVT.getSimpleVT());
+ Register NewReg =
+ MIRBuilder.getMRI()->createGenericVirtualRegister(VATy);
+ Handler.assignValueToReg(NewReg, VA.getLocReg(), VA);
+ // If it's a vector type, we either need to truncate the elements
+ // or do an unmerge to get the lower block of elements.
+ if (VATy.isVector() &&
+ VATy.getNumElements() > OrigVT.getVectorNumElements()) {
+ // Just handle the case where the VA type is 2 * original type.
+ if (VATy.getNumElements() != OrigVT.getVectorNumElements() * 2) {
+ LLVM_DEBUG(dbgs()
+ << "Incoming promoted vector arg has too many elts");
+ return false;
}
- j += Args[i].Regs.size() - 1;
+ auto Unmerge = MIRBuilder.buildUnmerge({OrigTy, OrigTy}, {NewReg});
+ MIRBuilder.buildCopy(ArgReg, Unmerge.getReg(0));
} else {
- Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA);
+ MIRBuilder.buildTrunc(ArgReg, {NewReg}).getReg(0);
}
- } else if (VA.isMemLoc()) {
- // Don't currently support loading/storing a type that needs to be split
- // to the stack. Should be easy, just not implemented yet.
- if (Args[i].Regs.size() > 1) {
- LLVM_DEBUG(
- dbgs()
- << "Load/store a split arg to/from the stack not implemented yet");
- return false;
+ }
+
+ // Now that all pieces have been handled, re-pack any arguments into any
+ // wider, original registers.
+ if (Handler.isIncomingArgumentHandler()) {
+ if (VAVT.getFixedSizeInBits() < OrigVT.getFixedSizeInBits()) {
+ assert(NumArgRegs >= 2);
+
+ // Merge the split registers into the expected larger result vreg
+ // of the original call.
+ MIRBuilder.buildMerge(Args[i].OrigRegs[0], Args[i].Regs);
}
+ }
- EVT LocVT = VA.getValVT();
- unsigned MemSize = LocVT == MVT::iPTR ? DL.getPointerSize()
- : LocVT.getStoreSize();
+ j += NumArgRegs - 1;
+ }
- unsigned Offset = VA.getLocMemOffset();
- MachinePointerInfo MPO;
- Register StackAddr = Handler.getStackAddress(MemSize, Offset, MPO);
- Handler.assignValueToAddress(Args[i], StackAddr, MemSize, MPO, VA);
- } else {
- // FIXME: Support byvals and other weirdness
+ return true;
+}
+
+void CallLowering::insertSRetLoads(MachineIRBuilder &MIRBuilder, Type *RetTy,
+ ArrayRef<Register> VRegs, Register DemoteReg,
+ int FI) const {
+ MachineFunction &MF = MIRBuilder.getMF();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const DataLayout &DL = MF.getDataLayout();
+
+ SmallVector<EVT, 4> SplitVTs;
+ SmallVector<uint64_t, 4> Offsets;
+ ComputeValueVTs(*TLI, DL, RetTy, SplitVTs, &Offsets, 0);
+
+ assert(VRegs.size() == SplitVTs.size());
+
+ unsigned NumValues = SplitVTs.size();
+ Align BaseAlign = DL.getPrefTypeAlign(RetTy);
+ Type *RetPtrTy = RetTy->getPointerTo(DL.getAllocaAddrSpace());
+ LLT OffsetLLTy = getLLTForType(*DL.getIntPtrType(RetPtrTy), DL);
+
+ MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
+
+ for (unsigned I = 0; I < NumValues; ++I) {
+ Register Addr;
+ MIRBuilder.materializePtrAdd(Addr, DemoteReg, OffsetLLTy, Offsets[I]);
+ auto *MMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
+ MRI.getType(VRegs[I]).getSizeInBytes(),
+ commonAlignment(BaseAlign, Offsets[I]));
+ MIRBuilder.buildLoad(VRegs[I], Addr, *MMO);
+ }
+}
+
+void CallLowering::insertSRetStores(MachineIRBuilder &MIRBuilder, Type *RetTy,
+ ArrayRef<Register> VRegs,
+ Register DemoteReg) const {
+ MachineFunction &MF = MIRBuilder.getMF();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const DataLayout &DL = MF.getDataLayout();
+
+ SmallVector<EVT, 4> SplitVTs;
+ SmallVector<uint64_t, 4> Offsets;
+ ComputeValueVTs(*TLI, DL, RetTy, SplitVTs, &Offsets, 0);
+
+ assert(VRegs.size() == SplitVTs.size());
+
+ unsigned NumValues = SplitVTs.size();
+ Align BaseAlign = DL.getPrefTypeAlign(RetTy);
+ unsigned AS = DL.getAllocaAddrSpace();
+ LLT OffsetLLTy =
+ getLLTForType(*DL.getIntPtrType(RetTy->getPointerTo(AS)), DL);
+
+ MachinePointerInfo PtrInfo(AS);
+
+ for (unsigned I = 0; I < NumValues; ++I) {
+ Register Addr;
+ MIRBuilder.materializePtrAdd(Addr, DemoteReg, OffsetLLTy, Offsets[I]);
+ auto *MMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
+ MRI.getType(VRegs[I]).getSizeInBytes(),
+ commonAlignment(BaseAlign, Offsets[I]));
+ MIRBuilder.buildStore(VRegs[I], Addr, *MMO);
+ }
+}
+
+void CallLowering::insertSRetIncomingArgument(
+ const Function &F, SmallVectorImpl<ArgInfo> &SplitArgs, Register &DemoteReg,
+ MachineRegisterInfo &MRI, const DataLayout &DL) const {
+ unsigned AS = DL.getAllocaAddrSpace();
+ DemoteReg = MRI.createGenericVirtualRegister(
+ LLT::pointer(AS, DL.getPointerSizeInBits(AS)));
+
+ Type *PtrTy = PointerType::get(F.getReturnType(), AS);
+
+ SmallVector<EVT, 1> ValueVTs;
+ ComputeValueVTs(*TLI, DL, PtrTy, ValueVTs);
+
+ // NOTE: Assume that a pointer won't get split into more than one VT.
+ assert(ValueVTs.size() == 1);
+
+ ArgInfo DemoteArg(DemoteReg, ValueVTs[0].getTypeForEVT(PtrTy->getContext()));
+ setArgFlags(DemoteArg, AttributeList::ReturnIndex, DL, F);
+ DemoteArg.Flags[0].setSRet();
+ SplitArgs.insert(SplitArgs.begin(), DemoteArg);
+}
+
+void CallLowering::insertSRetOutgoingArgument(MachineIRBuilder &MIRBuilder,
+ const CallBase &CB,
+ CallLoweringInfo &Info) const {
+ const DataLayout &DL = MIRBuilder.getDataLayout();
+ Type *RetTy = CB.getType();
+ unsigned AS = DL.getAllocaAddrSpace();
+ LLT FramePtrTy = LLT::pointer(AS, DL.getPointerSizeInBits(AS));
+
+ int FI = MIRBuilder.getMF().getFrameInfo().CreateStackObject(
+ DL.getTypeAllocSize(RetTy), DL.getPrefTypeAlign(RetTy), false);
+
+ Register DemoteReg = MIRBuilder.buildFrameIndex(FramePtrTy, FI).getReg(0);
+ ArgInfo DemoteArg(DemoteReg, PointerType::get(RetTy, AS));
+ setArgFlags(DemoteArg, AttributeList::ReturnIndex, DL, CB);
+ DemoteArg.Flags[0].setSRet();
+
+ Info.OrigArgs.insert(Info.OrigArgs.begin(), DemoteArg);
+ Info.DemoteStackIndex = FI;
+ Info.DemoteRegister = DemoteReg;
+}
+
+bool CallLowering::checkReturn(CCState &CCInfo,
+ SmallVectorImpl<BaseArgInfo> &Outs,
+ CCAssignFn *Fn) const {
+ for (unsigned I = 0, E = Outs.size(); I < E; ++I) {
+ MVT VT = MVT::getVT(Outs[I].Ty);
+ if (Fn(I, VT, VT, CCValAssign::Full, Outs[I].Flags[0], CCInfo))
return false;
- }
}
return true;
}
+void CallLowering::getReturnInfo(CallingConv::ID CallConv, Type *RetTy,
+ AttributeList Attrs,
+ SmallVectorImpl<BaseArgInfo> &Outs,
+ const DataLayout &DL) const {
+ LLVMContext &Context = RetTy->getContext();
+ ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
+
+ SmallVector<EVT, 4> SplitVTs;
+ ComputeValueVTs(*TLI, DL, RetTy, SplitVTs);
+ addArgFlagsFromAttributes(Flags, Attrs, AttributeList::ReturnIndex);
+
+ for (EVT VT : SplitVTs) {
+ unsigned NumParts =
+ TLI->getNumRegistersForCallingConv(Context, CallConv, VT);
+ MVT RegVT = TLI->getRegisterTypeForCallingConv(Context, CallConv, VT);
+ Type *PartTy = EVT(RegVT).getTypeForEVT(Context);
+
+ for (unsigned I = 0; I < NumParts; ++I) {
+ Outs.emplace_back(PartTy, Flags);
+ }
+ }
+}
+
+bool CallLowering::checkReturnTypeForCallConv(MachineFunction &MF) const {
+ const auto &F = MF.getFunction();
+ Type *ReturnType = F.getReturnType();
+ CallingConv::ID CallConv = F.getCallingConv();
+
+ SmallVector<BaseArgInfo, 4> SplitArgs;
+ getReturnInfo(CallConv, ReturnType, F.getAttributes(), SplitArgs,
+ MF.getDataLayout());
+ return canLowerReturn(MF, CallConv, SplitArgs, F.isVarArg());
+}
+
bool CallLowering::analyzeArgInfo(CCState &CCState,
SmallVectorImpl<ArgInfo> &Args,
CCAssignFn &AssignFnFixed,
@@ -409,6 +611,58 @@ bool CallLowering::analyzeArgInfo(CCState &CCState,
return true;
}
+bool CallLowering::parametersInCSRMatch(
+ const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask,
+ const SmallVectorImpl<CCValAssign> &OutLocs,
+ const SmallVectorImpl<ArgInfo> &OutArgs) const {
+ for (unsigned i = 0; i < OutLocs.size(); ++i) {
+ auto &ArgLoc = OutLocs[i];
+ // If it's not a register, it's fine.
+ if (!ArgLoc.isRegLoc())
+ continue;
+
+ MCRegister PhysReg = ArgLoc.getLocReg();
+
+ // Only look at callee-saved registers.
+ if (MachineOperand::clobbersPhysReg(CallerPreservedMask, PhysReg))
+ continue;
+
+ LLVM_DEBUG(
+ dbgs()
+ << "... Call has an argument passed in a callee-saved register.\n");
+
+ // Check if it was copied from.
+ const ArgInfo &OutInfo = OutArgs[i];
+
+ if (OutInfo.Regs.size() > 1) {
+ LLVM_DEBUG(
+ dbgs() << "... Cannot handle arguments in multiple registers.\n");
+ return false;
+ }
+
+ // Check if we copy the register, walking through copies from virtual
+ // registers. Note that getDefIgnoringCopies does not ignore copies from
+ // physical registers.
+ MachineInstr *RegDef = getDefIgnoringCopies(OutInfo.Regs[0], MRI);
+ if (!RegDef || RegDef->getOpcode() != TargetOpcode::COPY) {
+ LLVM_DEBUG(
+ dbgs()
+ << "... Parameter was not copied into a VReg, cannot tail call.\n");
+ return false;
+ }
+
+ // Got a copy. Verify that it's the same as the register we want.
+ Register CopyRHS = RegDef->getOperand(1).getReg();
+ if (CopyRHS != PhysReg) {
+ LLVM_DEBUG(dbgs() << "... Callee-saved register was not copied into "
+ "VReg, cannot tail call.\n");
+ return false;
+ }
+ }
+
+ return true;
+}
+
bool CallLowering::resultsCompatible(CallLoweringInfo &Info,
MachineFunction &MF,
SmallVectorImpl<ArgInfo> &InArgs,
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
index b4562a5c6601..f1071d96e5a3 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
@@ -130,8 +130,6 @@ bool Combiner::combineMachineInstrs(MachineFunction &MF,
WrapperObserver.addObserver(CSEInfo);
RAIIDelegateInstaller DelInstall(MF, &WrapperObserver);
for (MachineBasicBlock *MBB : post_order(&MF)) {
- if (MBB->empty())
- continue;
for (auto MII = MBB->rbegin(), MIE = MBB->rend(); MII != MIE;) {
MachineInstr *CurMI = &*MII;
++MII;
@@ -155,5 +153,8 @@ bool Combiner::combineMachineInstrs(MachineFunction &MF,
MFChanged |= Changed;
} while (Changed);
+ assert(!CSEInfo || (!errorToBool(CSEInfo->verify()) &&
+ "CSEInfo is not consistent. Likely missing calls to "
+ "observer on mutations"));
return MFChanged;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 194961ae3b21..df0219fcfa64 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -16,6 +16,7 @@
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -34,7 +35,6 @@ static cl::opt<bool>
cl::desc("Force all indexed operations to be "
"legal for the GlobalISel combiner"));
-
CombinerHelper::CombinerHelper(GISelChangeObserver &Observer,
MachineIRBuilder &B, GISelKnownBits *KB,
MachineDominatorTree *MDT,
@@ -44,6 +44,75 @@ CombinerHelper::CombinerHelper(GISelChangeObserver &Observer,
(void)this->KB;
}
+const TargetLowering &CombinerHelper::getTargetLowering() const {
+ return *Builder.getMF().getSubtarget().getTargetLowering();
+}
+
+/// \returns The little endian in-memory byte position of byte \p I in a
+/// \p ByteWidth bytes wide type.
+///
+/// E.g. Given a 4-byte type x, x[0] -> byte 0
+static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I) {
+ assert(I < ByteWidth && "I must be in [0, ByteWidth)");
+ return I;
+}
+
+/// \returns The big endian in-memory byte position of byte \p I in a
+/// \p ByteWidth bytes wide type.
+///
+/// E.g. Given a 4-byte type x, x[0] -> byte 3
+static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I) {
+ assert(I < ByteWidth && "I must be in [0, ByteWidth)");
+ return ByteWidth - I - 1;
+}
+
+/// Given a map from byte offsets in memory to indices in a load/store,
+/// determine if that map corresponds to a little or big endian byte pattern.
+///
+/// \param MemOffset2Idx maps memory offsets to address offsets.
+/// \param LowestIdx is the lowest index in \p MemOffset2Idx.
+///
+/// \returns true if the map corresponds to a big endian byte pattern, false
+/// if it corresponds to a little endian byte pattern, and None otherwise.
+///
+/// E.g. given a 32-bit type x, and x[AddrOffset], the in-memory byte patterns
+/// are as follows:
+///
+/// AddrOffset Little endian Big endian
+/// 0 0 3
+/// 1 1 2
+/// 2 2 1
+/// 3 3 0
+static Optional<bool>
+isBigEndian(const SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx,
+ int64_t LowestIdx) {
+ // Need at least two byte positions to decide on endianness.
+ unsigned Width = MemOffset2Idx.size();
+ if (Width < 2)
+ return None;
+ bool BigEndian = true, LittleEndian = true;
+ for (unsigned MemOffset = 0; MemOffset < Width; ++ MemOffset) {
+ auto MemOffsetAndIdx = MemOffset2Idx.find(MemOffset);
+ if (MemOffsetAndIdx == MemOffset2Idx.end())
+ return None;
+ const int64_t Idx = MemOffsetAndIdx->second - LowestIdx;
+ assert(Idx >= 0 && "Expected non-negative byte offset?");
+ LittleEndian &= Idx == littleEndianByteAt(Width, MemOffset);
+ BigEndian &= Idx == bigEndianByteAt(Width, MemOffset);
+ if (!BigEndian && !LittleEndian)
+ return None;
+ }
+
+ assert((BigEndian != LittleEndian) &&
+ "Pattern cannot be both big and little endian!");
+ return BigEndian;
+}
+
+bool CombinerHelper::isLegalOrBeforeLegalizer(
+ const LegalityQuery &Query) const {
+ return !LI || LI->getAction(Query).Action == LegalizeActions::Legal;
+}
+
void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, Register FromReg,
Register ToReg) const {
Observer.changingAllUsesOfReg(MRI, FromReg);
@@ -555,13 +624,13 @@ bool CombinerHelper::isPredecessor(const MachineInstr &DefMI,
assert(DefMI.getParent() == UseMI.getParent());
if (&DefMI == &UseMI)
return false;
-
- // Loop through the basic block until we find one of the instructions.
- MachineBasicBlock::const_iterator I = DefMI.getParent()->begin();
- for (; &*I != &DefMI && &*I != &UseMI; ++I)
- return &*I == &DefMI;
-
- llvm_unreachable("Block must contain instructions");
+ const MachineBasicBlock &MBB = *DefMI.getParent();
+ auto DefOrUse = find_if(MBB, [&DefMI, &UseMI](const MachineInstr &MI) {
+ return &MI == &DefMI || &MI == &UseMI;
+ });
+ if (DefOrUse == MBB.end())
+ llvm_unreachable("Block must contain both DefMI and UseMI!");
+ return &*DefOrUse == &DefMI;
}
bool CombinerHelper::dominates(const MachineInstr &DefMI,
@@ -576,20 +645,97 @@ bool CombinerHelper::dominates(const MachineInstr &DefMI,
return isPredecessor(DefMI, UseMI);
}
-bool CombinerHelper::matchSextAlreadyExtended(MachineInstr &MI) {
+bool CombinerHelper::matchSextTruncSextLoad(MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
Register SrcReg = MI.getOperand(1).getReg();
- unsigned SrcSignBits = KB->computeNumSignBits(SrcReg);
- unsigned NumSextBits =
- MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits() -
- MI.getOperand(2).getImm();
- return SrcSignBits >= NumSextBits;
+ Register LoadUser = SrcReg;
+
+ if (MRI.getType(SrcReg).isVector())
+ return false;
+
+ Register TruncSrc;
+ if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc))))
+ LoadUser = TruncSrc;
+
+ uint64_t SizeInBits = MI.getOperand(2).getImm();
+ // If the source is a G_SEXTLOAD from the same bit width, then we don't
+ // need any extend at all, just a truncate.
+ if (auto *LoadMI = getOpcodeDef(TargetOpcode::G_SEXTLOAD, LoadUser, MRI)) {
+ const auto &MMO = **LoadMI->memoperands_begin();
+ // If truncating more than the original extended value, abort.
+ if (TruncSrc && MRI.getType(TruncSrc).getSizeInBits() < MMO.getSizeInBits())
+ return false;
+ if (MMO.getSizeInBits() == SizeInBits)
+ return true;
+ }
+ return false;
}
-bool CombinerHelper::applySextAlreadyExtended(MachineInstr &MI) {
+bool CombinerHelper::applySextTruncSextLoad(MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
- MachineIRBuilder MIB(MI);
- MIB.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
+ Builder.setInstrAndDebugLoc(MI);
+ Builder.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::matchSextInRegOfLoad(
+ MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
+
+ // Only supports scalars for now.
+ if (MRI.getType(MI.getOperand(0).getReg()).isVector())
+ return false;
+
+ Register SrcReg = MI.getOperand(1).getReg();
+ MachineInstr *LoadDef = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
+ if (!LoadDef || !MRI.hasOneNonDBGUse(LoadDef->getOperand(0).getReg()))
+ return false;
+
+ // If the sign extend extends from a narrower width than the load's width,
+ // then we can narrow the load width when we combine to a G_SEXTLOAD.
+ auto &MMO = **LoadDef->memoperands_begin();
+ // Don't do this for non-simple loads.
+ if (MMO.isAtomic() || MMO.isVolatile())
+ return false;
+
+ // Avoid widening the load at all.
+ unsigned NewSizeBits =
+ std::min((uint64_t)MI.getOperand(2).getImm(), MMO.getSizeInBits());
+
+ // Don't generate G_SEXTLOADs with a < 1 byte width.
+ if (NewSizeBits < 8)
+ return false;
+ // Don't bother creating a non-power-2 sextload, it will likely be broken up
+ // anyway for most targets.
+ if (!isPowerOf2_32(NewSizeBits))
+ return false;
+ MatchInfo = std::make_tuple(LoadDef->getOperand(0).getReg(), NewSizeBits);
+ return true;
+}
+
+bool CombinerHelper::applySextInRegOfLoad(
+ MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
+ Register LoadReg;
+ unsigned ScalarSizeBits;
+ std::tie(LoadReg, ScalarSizeBits) = MatchInfo;
+ auto *LoadDef = MRI.getVRegDef(LoadReg);
+ assert(LoadDef && "Expected a load reg");
+
+ // If we have the following:
+ // %ld = G_LOAD %ptr, (load 2)
+ // %ext = G_SEXT_INREG %ld, 8
+ // ==>
+ // %ld = G_SEXTLOAD %ptr (load 1)
+
+ auto &MMO = **LoadDef->memoperands_begin();
+ Builder.setInstrAndDebugLoc(MI);
+ auto &MF = Builder.getMF();
+ auto PtrInfo = MMO.getPointerInfo();
+ auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, ScalarSizeBits / 8);
+ Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(),
+ LoadDef->getOperand(1).getReg(), *NewMMO);
MI.eraseFromParent();
return true;
}
@@ -611,7 +757,7 @@ bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr,
return false;
LLVM_DEBUG(dbgs() << "Searching for post-indexing opportunity for: " << MI);
-
+ // FIXME: The following use traversal needs a bail out for patholigical cases.
for (auto &Use : MRI.use_nodbg_instructions(Base)) {
if (Use.getOpcode() != TargetOpcode::G_PTR_ADD)
continue;
@@ -738,6 +884,11 @@ bool CombinerHelper::matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadS
Opcode != TargetOpcode::G_ZEXTLOAD && Opcode != TargetOpcode::G_STORE)
return false;
+ // For now, no targets actually support these opcodes so don't waste time
+ // running these unless we're forced to for testing.
+ if (!ForceLegalIndexing)
+ return false;
+
MatchInfo.IsPre = findPreIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base,
MatchInfo.Offset);
if (!MatchInfo.IsPre &&
@@ -790,14 +941,12 @@ void CombinerHelper::applyCombineIndexedLoadStore(
LLVM_DEBUG(dbgs() << " Combinined to indexed operation");
}
-bool CombinerHelper::matchElideBrByInvertingCond(MachineInstr &MI) {
+bool CombinerHelper::matchOptBrCondByInvertingCond(MachineInstr &MI) {
if (MI.getOpcode() != TargetOpcode::G_BR)
return false;
// Try to match the following:
// bb1:
- // %c(s32) = G_ICMP pred, %a, %b
- // %c1(s1) = G_TRUNC %c(s32)
// G_BRCOND %c1, %bb2
// G_BR %bb3
// bb2:
@@ -807,7 +956,7 @@ bool CombinerHelper::matchElideBrByInvertingCond(MachineInstr &MI) {
// The above pattern does not have a fall through to the successor bb2, always
// resulting in a branch no matter which path is taken. Here we try to find
// and replace that pattern with conditional branch to bb3 and otherwise
- // fallthrough to bb2.
+ // fallthrough to bb2. This is generally better for branch predictors.
MachineBasicBlock *MBB = MI.getParent();
MachineBasicBlock::iterator BrIt(MI);
@@ -822,40 +971,34 @@ bool CombinerHelper::matchElideBrByInvertingCond(MachineInstr &MI) {
// Check that the next block is the conditional branch target.
if (!MBB->isLayoutSuccessor(BrCond->getOperand(1).getMBB()))
return false;
-
- MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg());
- if (!CmpMI || CmpMI->getOpcode() != TargetOpcode::G_ICMP ||
- !MRI.hasOneNonDBGUse(CmpMI->getOperand(0).getReg()))
- return false;
return true;
}
-bool CombinerHelper::tryElideBrByInvertingCond(MachineInstr &MI) {
- if (!matchElideBrByInvertingCond(MI))
- return false;
- applyElideBrByInvertingCond(MI);
- return true;
-}
-
-void CombinerHelper::applyElideBrByInvertingCond(MachineInstr &MI) {
+void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr &MI) {
MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
MachineBasicBlock::iterator BrIt(MI);
MachineInstr *BrCond = &*std::prev(BrIt);
- MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg());
- CmpInst::Predicate InversePred = CmpInst::getInversePredicate(
- (CmpInst::Predicate)CmpMI->getOperand(1).getPredicate());
+ Builder.setInstrAndDebugLoc(*BrCond);
+ LLT Ty = MRI.getType(BrCond->getOperand(0).getReg());
+ // FIXME: Does int/fp matter for this? If so, we might need to restrict
+ // this to i1 only since we might not know for sure what kind of
+ // compare generated the condition value.
+ auto True = Builder.buildConstant(
+ Ty, getICmpTrueVal(getTargetLowering(), false, false));
+ auto Xor = Builder.buildXor(Ty, BrCond->getOperand(0), True);
- // Invert the G_ICMP condition.
- Observer.changingInstr(*CmpMI);
- CmpMI->getOperand(1).setPredicate(InversePred);
- Observer.changedInstr(*CmpMI);
+ auto *FallthroughBB = BrCond->getOperand(1).getMBB();
+ Observer.changingInstr(MI);
+ MI.getOperand(0).setMBB(FallthroughBB);
+ Observer.changedInstr(MI);
- // Change the conditional branch target.
+ // Change the conditional branch to use the inverted condition and
+ // new target block.
Observer.changingInstr(*BrCond);
+ BrCond->getOperand(0).setReg(Xor.getReg(0));
BrCond->getOperand(1).setMBB(BrTarget);
Observer.changedInstr(*BrCond);
- MI.eraseFromParent();
}
static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
@@ -946,8 +1089,7 @@ static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) {
unsigned NumBits = Ty.getScalarSizeInBits();
auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI);
if (!Ty.isVector() && ValVRegAndVal) {
- unsigned KnownVal = ValVRegAndVal->Value;
- APInt Scalar = APInt(8, KnownVal);
+ APInt Scalar = ValVRegAndVal->Value.truncOrSelf(8);
APInt SplatVal = APInt::getSplat(NumBits, Scalar);
return MIB.buildConstant(Ty, SplatVal).getReg(0);
}
@@ -1299,13 +1441,11 @@ bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst,
}
bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
+ const unsigned Opc = MI.getOpcode();
// This combine is fairly complex so it's not written with a separate
// matcher function.
- assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
- Intrinsic::ID ID = (Intrinsic::ID)MI.getIntrinsicID();
- assert((ID == Intrinsic::memcpy || ID == Intrinsic::memmove ||
- ID == Intrinsic::memset) &&
- "Expected a memcpy like intrinsic");
+ assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
+ Opc == TargetOpcode::G_MEMSET) && "Expected memcpy like instruction");
auto MMOIt = MI.memoperands_begin();
const MachineMemOperand *MemOp = *MMOIt;
@@ -1316,11 +1456,11 @@ bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
Align DstAlign = MemOp->getBaseAlign();
Align SrcAlign;
- Register Dst = MI.getOperand(1).getReg();
- Register Src = MI.getOperand(2).getReg();
- Register Len = MI.getOperand(3).getReg();
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+ Register Len = MI.getOperand(2).getReg();
- if (ID != Intrinsic::memset) {
+ if (Opc != TargetOpcode::G_MEMSET) {
assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
MemOp = *(++MMOIt);
SrcAlign = MemOp->getBaseAlign();
@@ -1330,7 +1470,7 @@ bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI);
if (!LenVRegAndVal)
return false; // Leave it to the legalizer to lower it to a libcall.
- unsigned KnownLen = LenVRegAndVal->Value;
+ unsigned KnownLen = LenVRegAndVal->Value.getZExtValue();
if (KnownLen == 0) {
MI.eraseFromParent();
@@ -1340,15 +1480,78 @@ bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
if (MaxLen && KnownLen > MaxLen)
return false;
- if (ID == Intrinsic::memcpy)
+ if (Opc == TargetOpcode::G_MEMCPY)
return optimizeMemcpy(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
- if (ID == Intrinsic::memmove)
+ if (Opc == TargetOpcode::G_MEMMOVE)
return optimizeMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
- if (ID == Intrinsic::memset)
+ if (Opc == TargetOpcode::G_MEMSET)
return optimizeMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
return false;
}
+static Optional<APFloat> constantFoldFpUnary(unsigned Opcode, LLT DstTy,
+ const Register Op,
+ const MachineRegisterInfo &MRI) {
+ const ConstantFP *MaybeCst = getConstantFPVRegVal(Op, MRI);
+ if (!MaybeCst)
+ return None;
+
+ APFloat V = MaybeCst->getValueAPF();
+ switch (Opcode) {
+ default:
+ llvm_unreachable("Unexpected opcode!");
+ case TargetOpcode::G_FNEG: {
+ V.changeSign();
+ return V;
+ }
+ case TargetOpcode::G_FABS: {
+ V.clearSign();
+ return V;
+ }
+ case TargetOpcode::G_FPTRUNC:
+ break;
+ case TargetOpcode::G_FSQRT: {
+ bool Unused;
+ V.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &Unused);
+ V = APFloat(sqrt(V.convertToDouble()));
+ break;
+ }
+ case TargetOpcode::G_FLOG2: {
+ bool Unused;
+ V.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &Unused);
+ V = APFloat(log2(V.convertToDouble()));
+ break;
+ }
+ }
+ // Convert `APFloat` to appropriate IEEE type depending on `DstTy`. Otherwise,
+ // `buildFConstant` will assert on size mismatch. Only `G_FPTRUNC`, `G_FSQRT`,
+ // and `G_FLOG2` reach here.
+ bool Unused;
+ V.convert(getFltSemanticForLLT(DstTy), APFloat::rmNearestTiesToEven, &Unused);
+ return V;
+}
+
+bool CombinerHelper::matchCombineConstantFoldFpUnary(MachineInstr &MI,
+ Optional<APFloat> &Cst) {
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ Cst = constantFoldFpUnary(MI.getOpcode(), DstTy, SrcReg, MRI);
+ return Cst.hasValue();
+}
+
+bool CombinerHelper::applyCombineConstantFoldFpUnary(MachineInstr &MI,
+ Optional<APFloat> &Cst) {
+ assert(Cst.hasValue() && "Optional is unexpectedly empty!");
+ Builder.setInstrAndDebugLoc(MI);
+ MachineFunction &MF = Builder.getMF();
+ auto *FPVal = ConstantFP::get(MF.getFunction().getContext(), *Cst);
+ Register DstReg = MI.getOperand(0).getReg();
+ Builder.buildFConstant(DstReg, *FPVal);
+ MI.eraseFromParent();
+ return true;
+}
+
bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI,
PtrAddChain &MatchInfo) {
// We're trying to match the following pattern:
@@ -1377,7 +1580,7 @@ bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI,
return false;
// Pass the combined immediate to the apply function.
- MatchInfo.Imm = MaybeImmVal->Value + MaybeImm2Val->Value;
+ MatchInfo.Imm = (MaybeImmVal->Value + MaybeImm2Val->Value).getSExtValue();
MatchInfo.Base = Base;
return true;
}
@@ -1395,15 +1598,211 @@ bool CombinerHelper::applyPtrAddImmedChain(MachineInstr &MI,
return true;
}
+bool CombinerHelper::matchShiftImmedChain(MachineInstr &MI,
+ RegisterImmPair &MatchInfo) {
+ // We're trying to match the following pattern with any of
+ // G_SHL/G_ASHR/G_LSHR/G_SSHLSAT/G_USHLSAT shift instructions:
+ // %t1 = SHIFT %base, G_CONSTANT imm1
+ // %root = SHIFT %t1, G_CONSTANT imm2
+ // -->
+ // %root = SHIFT %base, G_CONSTANT (imm1 + imm2)
+
+ unsigned Opcode = MI.getOpcode();
+ assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
+ Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
+ Opcode == TargetOpcode::G_USHLSAT) &&
+ "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
+
+ Register Shl2 = MI.getOperand(1).getReg();
+ Register Imm1 = MI.getOperand(2).getReg();
+ auto MaybeImmVal = getConstantVRegValWithLookThrough(Imm1, MRI);
+ if (!MaybeImmVal)
+ return false;
+
+ MachineInstr *Shl2Def = MRI.getUniqueVRegDef(Shl2);
+ if (Shl2Def->getOpcode() != Opcode)
+ return false;
+
+ Register Base = Shl2Def->getOperand(1).getReg();
+ Register Imm2 = Shl2Def->getOperand(2).getReg();
+ auto MaybeImm2Val = getConstantVRegValWithLookThrough(Imm2, MRI);
+ if (!MaybeImm2Val)
+ return false;
+
+ // Pass the combined immediate to the apply function.
+ MatchInfo.Imm =
+ (MaybeImmVal->Value.getSExtValue() + MaybeImm2Val->Value).getSExtValue();
+ MatchInfo.Reg = Base;
+
+ // There is no simple replacement for a saturating unsigned left shift that
+ // exceeds the scalar size.
+ if (Opcode == TargetOpcode::G_USHLSAT &&
+ MatchInfo.Imm >= MRI.getType(Shl2).getScalarSizeInBits())
+ return false;
+
+ return true;
+}
+
+bool CombinerHelper::applyShiftImmedChain(MachineInstr &MI,
+ RegisterImmPair &MatchInfo) {
+ unsigned Opcode = MI.getOpcode();
+ assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
+ Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
+ Opcode == TargetOpcode::G_USHLSAT) &&
+ "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
+
+ Builder.setInstrAndDebugLoc(MI);
+ LLT Ty = MRI.getType(MI.getOperand(1).getReg());
+ unsigned const ScalarSizeInBits = Ty.getScalarSizeInBits();
+ auto Imm = MatchInfo.Imm;
+
+ if (Imm >= ScalarSizeInBits) {
+ // Any logical shift that exceeds scalar size will produce zero.
+ if (Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR) {
+ Builder.buildConstant(MI.getOperand(0), 0);
+ MI.eraseFromParent();
+ return true;
+ }
+ // Arithmetic shift and saturating signed left shift have no effect beyond
+ // scalar size.
+ Imm = ScalarSizeInBits - 1;
+ }
+
+ LLT ImmTy = MRI.getType(MI.getOperand(2).getReg());
+ Register NewImm = Builder.buildConstant(ImmTy, Imm).getReg(0);
+ Observer.changingInstr(MI);
+ MI.getOperand(1).setReg(MatchInfo.Reg);
+ MI.getOperand(2).setReg(NewImm);
+ Observer.changedInstr(MI);
+ return true;
+}
+
+bool CombinerHelper::matchShiftOfShiftedLogic(MachineInstr &MI,
+ ShiftOfShiftedLogic &MatchInfo) {
+ // We're trying to match the following pattern with any of
+ // G_SHL/G_ASHR/G_LSHR/G_USHLSAT/G_SSHLSAT shift instructions in combination
+ // with any of G_AND/G_OR/G_XOR logic instructions.
+ // %t1 = SHIFT %X, G_CONSTANT C0
+ // %t2 = LOGIC %t1, %Y
+ // %root = SHIFT %t2, G_CONSTANT C1
+ // -->
+ // %t3 = SHIFT %X, G_CONSTANT (C0+C1)
+ // %t4 = SHIFT %Y, G_CONSTANT C1
+ // %root = LOGIC %t3, %t4
+ unsigned ShiftOpcode = MI.getOpcode();
+ assert((ShiftOpcode == TargetOpcode::G_SHL ||
+ ShiftOpcode == TargetOpcode::G_ASHR ||
+ ShiftOpcode == TargetOpcode::G_LSHR ||
+ ShiftOpcode == TargetOpcode::G_USHLSAT ||
+ ShiftOpcode == TargetOpcode::G_SSHLSAT) &&
+ "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
+
+ // Match a one-use bitwise logic op.
+ Register LogicDest = MI.getOperand(1).getReg();
+ if (!MRI.hasOneNonDBGUse(LogicDest))
+ return false;
+
+ MachineInstr *LogicMI = MRI.getUniqueVRegDef(LogicDest);
+ unsigned LogicOpcode = LogicMI->getOpcode();
+ if (LogicOpcode != TargetOpcode::G_AND && LogicOpcode != TargetOpcode::G_OR &&
+ LogicOpcode != TargetOpcode::G_XOR)
+ return false;
+
+ // Find a matching one-use shift by constant.
+ const Register C1 = MI.getOperand(2).getReg();
+ auto MaybeImmVal = getConstantVRegValWithLookThrough(C1, MRI);
+ if (!MaybeImmVal)
+ return false;
+
+ const uint64_t C1Val = MaybeImmVal->Value.getZExtValue();
+
+ auto matchFirstShift = [&](const MachineInstr *MI, uint64_t &ShiftVal) {
+ // Shift should match previous one and should be a one-use.
+ if (MI->getOpcode() != ShiftOpcode ||
+ !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
+ return false;
+
+ // Must be a constant.
+ auto MaybeImmVal =
+ getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
+ if (!MaybeImmVal)
+ return false;
+
+ ShiftVal = MaybeImmVal->Value.getSExtValue();
+ return true;
+ };
+
+ // Logic ops are commutative, so check each operand for a match.
+ Register LogicMIReg1 = LogicMI->getOperand(1).getReg();
+ MachineInstr *LogicMIOp1 = MRI.getUniqueVRegDef(LogicMIReg1);
+ Register LogicMIReg2 = LogicMI->getOperand(2).getReg();
+ MachineInstr *LogicMIOp2 = MRI.getUniqueVRegDef(LogicMIReg2);
+ uint64_t C0Val;
+
+ if (matchFirstShift(LogicMIOp1, C0Val)) {
+ MatchInfo.LogicNonShiftReg = LogicMIReg2;
+ MatchInfo.Shift2 = LogicMIOp1;
+ } else if (matchFirstShift(LogicMIOp2, C0Val)) {
+ MatchInfo.LogicNonShiftReg = LogicMIReg1;
+ MatchInfo.Shift2 = LogicMIOp2;
+ } else
+ return false;
+
+ MatchInfo.ValSum = C0Val + C1Val;
+
+ // The fold is not valid if the sum of the shift values exceeds bitwidth.
+ if (MatchInfo.ValSum >= MRI.getType(LogicDest).getScalarSizeInBits())
+ return false;
+
+ MatchInfo.Logic = LogicMI;
+ return true;
+}
+
+bool CombinerHelper::applyShiftOfShiftedLogic(MachineInstr &MI,
+ ShiftOfShiftedLogic &MatchInfo) {
+ unsigned Opcode = MI.getOpcode();
+ assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
+ Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_USHLSAT ||
+ Opcode == TargetOpcode::G_SSHLSAT) &&
+ "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
+
+ LLT ShlType = MRI.getType(MI.getOperand(2).getReg());
+ LLT DestType = MRI.getType(MI.getOperand(0).getReg());
+ Builder.setInstrAndDebugLoc(MI);
+
+ Register Const = Builder.buildConstant(ShlType, MatchInfo.ValSum).getReg(0);
+
+ Register Shift1Base = MatchInfo.Shift2->getOperand(1).getReg();
+ Register Shift1 =
+ Builder.buildInstr(Opcode, {DestType}, {Shift1Base, Const}).getReg(0);
+
+ Register Shift2Const = MI.getOperand(2).getReg();
+ Register Shift2 = Builder
+ .buildInstr(Opcode, {DestType},
+ {MatchInfo.LogicNonShiftReg, Shift2Const})
+ .getReg(0);
+
+ Register Dest = MI.getOperand(0).getReg();
+ Builder.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2});
+
+ // These were one use so it's safe to remove them.
+ MatchInfo.Shift2->eraseFromParent();
+ MatchInfo.Logic->eraseFromParent();
+
+ MI.eraseFromParent();
+ return true;
+}
+
bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI,
unsigned &ShiftVal) {
assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
auto MaybeImmVal =
getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
- if (!MaybeImmVal || !isPowerOf2_64(MaybeImmVal->Value))
+ if (!MaybeImmVal)
return false;
- ShiftVal = Log2_64(MaybeImmVal->Value);
- return true;
+
+ ShiftVal = MaybeImmVal->Value.exactLogBase2();
+ return (static_cast<int32_t>(ShiftVal) != -1);
}
bool CombinerHelper::applyCombineMulToShl(MachineInstr &MI,
@@ -1419,6 +1818,254 @@ bool CombinerHelper::applyCombineMulToShl(MachineInstr &MI,
return true;
}
+// shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source
+bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI,
+ RegisterImmPair &MatchData) {
+ assert(MI.getOpcode() == TargetOpcode::G_SHL && KB);
+
+ Register LHS = MI.getOperand(1).getReg();
+
+ Register ExtSrc;
+ if (!mi_match(LHS, MRI, m_GAnyExt(m_Reg(ExtSrc))) &&
+ !mi_match(LHS, MRI, m_GZExt(m_Reg(ExtSrc))) &&
+ !mi_match(LHS, MRI, m_GSExt(m_Reg(ExtSrc))))
+ return false;
+
+ // TODO: Should handle vector splat.
+ Register RHS = MI.getOperand(2).getReg();
+ auto MaybeShiftAmtVal = getConstantVRegValWithLookThrough(RHS, MRI);
+ if (!MaybeShiftAmtVal)
+ return false;
+
+ if (LI) {
+ LLT SrcTy = MRI.getType(ExtSrc);
+
+ // We only really care about the legality with the shifted value. We can
+ // pick any type the constant shift amount, so ask the target what to
+ // use. Otherwise we would have to guess and hope it is reported as legal.
+ LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(SrcTy);
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SHL, {SrcTy, ShiftAmtTy}}))
+ return false;
+ }
+
+ int64_t ShiftAmt = MaybeShiftAmtVal->Value.getSExtValue();
+ MatchData.Reg = ExtSrc;
+ MatchData.Imm = ShiftAmt;
+
+ unsigned MinLeadingZeros = KB->getKnownZeroes(ExtSrc).countLeadingOnes();
+ return MinLeadingZeros >= ShiftAmt;
+}
+
+bool CombinerHelper::applyCombineShlOfExtend(MachineInstr &MI,
+ const RegisterImmPair &MatchData) {
+ Register ExtSrcReg = MatchData.Reg;
+ int64_t ShiftAmtVal = MatchData.Imm;
+
+ LLT ExtSrcTy = MRI.getType(ExtSrcReg);
+ Builder.setInstrAndDebugLoc(MI);
+ auto ShiftAmt = Builder.buildConstant(ExtSrcTy, ShiftAmtVal);
+ auto NarrowShift =
+ Builder.buildShl(ExtSrcTy, ExtSrcReg, ShiftAmt, MI.getFlags());
+ Builder.buildZExt(MI.getOperand(0), NarrowShift);
+ MI.eraseFromParent();
+ return true;
+}
+
+static Register peekThroughBitcast(Register Reg,
+ const MachineRegisterInfo &MRI) {
+ while (mi_match(Reg, MRI, m_GBitcast(m_Reg(Reg))))
+ ;
+
+ return Reg;
+}
+
+bool CombinerHelper::matchCombineUnmergeMergeToPlainValues(
+ MachineInstr &MI, SmallVectorImpl<Register> &Operands) {
+ assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
+ "Expected an unmerge");
+ Register SrcReg =
+ peekThroughBitcast(MI.getOperand(MI.getNumOperands() - 1).getReg(), MRI);
+
+ MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg);
+ if (SrcInstr->getOpcode() != TargetOpcode::G_MERGE_VALUES &&
+ SrcInstr->getOpcode() != TargetOpcode::G_BUILD_VECTOR &&
+ SrcInstr->getOpcode() != TargetOpcode::G_CONCAT_VECTORS)
+ return false;
+
+ // Check the source type of the merge.
+ LLT SrcMergeTy = MRI.getType(SrcInstr->getOperand(1).getReg());
+ LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg());
+ bool SameSize = Dst0Ty.getSizeInBits() == SrcMergeTy.getSizeInBits();
+ if (SrcMergeTy != Dst0Ty && !SameSize)
+ return false;
+ // They are the same now (modulo a bitcast).
+ // We can collect all the src registers.
+ for (unsigned Idx = 1, EndIdx = SrcInstr->getNumOperands(); Idx != EndIdx;
+ ++Idx)
+ Operands.push_back(SrcInstr->getOperand(Idx).getReg());
+ return true;
+}
+
+bool CombinerHelper::applyCombineUnmergeMergeToPlainValues(
+ MachineInstr &MI, SmallVectorImpl<Register> &Operands) {
+ assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
+ "Expected an unmerge");
+ assert((MI.getNumOperands() - 1 == Operands.size()) &&
+ "Not enough operands to replace all defs");
+ unsigned NumElems = MI.getNumOperands() - 1;
+
+ LLT SrcTy = MRI.getType(Operands[0]);
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ bool CanReuseInputDirectly = DstTy == SrcTy;
+ Builder.setInstrAndDebugLoc(MI);
+ for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
+ Register DstReg = MI.getOperand(Idx).getReg();
+ Register SrcReg = Operands[Idx];
+ if (CanReuseInputDirectly)
+ replaceRegWith(MRI, DstReg, SrcReg);
+ else
+ Builder.buildCast(DstReg, SrcReg);
+ }
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::matchCombineUnmergeConstant(MachineInstr &MI,
+ SmallVectorImpl<APInt> &Csts) {
+ unsigned SrcIdx = MI.getNumOperands() - 1;
+ Register SrcReg = MI.getOperand(SrcIdx).getReg();
+ MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg);
+ if (SrcInstr->getOpcode() != TargetOpcode::G_CONSTANT &&
+ SrcInstr->getOpcode() != TargetOpcode::G_FCONSTANT)
+ return false;
+ // Break down the big constant in smaller ones.
+ const MachineOperand &CstVal = SrcInstr->getOperand(1);
+ APInt Val = SrcInstr->getOpcode() == TargetOpcode::G_CONSTANT
+ ? CstVal.getCImm()->getValue()
+ : CstVal.getFPImm()->getValueAPF().bitcastToAPInt();
+
+ LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg());
+ unsigned ShiftAmt = Dst0Ty.getSizeInBits();
+ // Unmerge a constant.
+ for (unsigned Idx = 0; Idx != SrcIdx; ++Idx) {
+ Csts.emplace_back(Val.trunc(ShiftAmt));
+ Val = Val.lshr(ShiftAmt);
+ }
+
+ return true;
+}
+
+bool CombinerHelper::applyCombineUnmergeConstant(MachineInstr &MI,
+ SmallVectorImpl<APInt> &Csts) {
+ assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
+ "Expected an unmerge");
+ assert((MI.getNumOperands() - 1 == Csts.size()) &&
+ "Not enough operands to replace all defs");
+ unsigned NumElems = MI.getNumOperands() - 1;
+ Builder.setInstrAndDebugLoc(MI);
+ for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
+ Register DstReg = MI.getOperand(Idx).getReg();
+ Builder.buildConstant(DstReg, Csts[Idx]);
+ }
+
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
+ "Expected an unmerge");
+ // Check that all the lanes are dead except the first one.
+ for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
+ if (!MRI.use_nodbg_empty(MI.getOperand(Idx).getReg()))
+ return false;
+ }
+ return true;
+}
+
+bool CombinerHelper::applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) {
+ Builder.setInstrAndDebugLoc(MI);
+ Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
+ // Truncating a vector is going to truncate every single lane,
+ // whereas we want the full lowbits.
+ // Do the operation on a scalar instead.
+ LLT SrcTy = MRI.getType(SrcReg);
+ if (SrcTy.isVector())
+ SrcReg =
+ Builder.buildCast(LLT::scalar(SrcTy.getSizeInBits()), SrcReg).getReg(0);
+
+ Register Dst0Reg = MI.getOperand(0).getReg();
+ LLT Dst0Ty = MRI.getType(Dst0Reg);
+ if (Dst0Ty.isVector()) {
+ auto MIB = Builder.buildTrunc(LLT::scalar(Dst0Ty.getSizeInBits()), SrcReg);
+ Builder.buildCast(Dst0Reg, MIB);
+ } else
+ Builder.buildTrunc(Dst0Reg, SrcReg);
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::matchCombineUnmergeZExtToZExt(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
+ "Expected an unmerge");
+ Register Dst0Reg = MI.getOperand(0).getReg();
+ LLT Dst0Ty = MRI.getType(Dst0Reg);
+ // G_ZEXT on vector applies to each lane, so it will
+ // affect all destinations. Therefore we won't be able
+ // to simplify the unmerge to just the first definition.
+ if (Dst0Ty.isVector())
+ return false;
+ Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
+ LLT SrcTy = MRI.getType(SrcReg);
+ if (SrcTy.isVector())
+ return false;
+
+ Register ZExtSrcReg;
+ if (!mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZExtSrcReg))))
+ return false;
+
+ // Finally we can replace the first definition with
+ // a zext of the source if the definition is big enough to hold
+ // all of ZExtSrc bits.
+ LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
+ return ZExtSrcTy.getSizeInBits() <= Dst0Ty.getSizeInBits();
+}
+
+bool CombinerHelper::applyCombineUnmergeZExtToZExt(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
+ "Expected an unmerge");
+
+ Register Dst0Reg = MI.getOperand(0).getReg();
+
+ MachineInstr *ZExtInstr =
+ MRI.getVRegDef(MI.getOperand(MI.getNumDefs()).getReg());
+ assert(ZExtInstr && ZExtInstr->getOpcode() == TargetOpcode::G_ZEXT &&
+ "Expecting a G_ZEXT");
+
+ Register ZExtSrcReg = ZExtInstr->getOperand(1).getReg();
+ LLT Dst0Ty = MRI.getType(Dst0Reg);
+ LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
+
+ Builder.setInstrAndDebugLoc(MI);
+
+ if (Dst0Ty.getSizeInBits() > ZExtSrcTy.getSizeInBits()) {
+ Builder.buildZExt(Dst0Reg, ZExtSrcReg);
+ } else {
+ assert(Dst0Ty.getSizeInBits() == ZExtSrcTy.getSizeInBits() &&
+ "ZExt src doesn't fit in destination");
+ replaceRegWith(MRI, Dst0Reg, ZExtSrcReg);
+ }
+
+ Register ZeroReg;
+ for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
+ if (!ZeroReg)
+ ZeroReg = Builder.buildConstant(Dst0Ty, 0).getReg(0);
+ replaceRegWith(MRI, MI.getOperand(Idx).getReg(), ZeroReg);
+ }
+ MI.eraseFromParent();
+ return true;
+}
+
bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI,
unsigned TargetShiftSize,
unsigned &ShiftVal) {
@@ -1440,7 +2087,7 @@ bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI,
if (!MaybeImmVal)
return false;
- ShiftVal = MaybeImmVal->Value;
+ ShiftVal = MaybeImmVal->Value.getSExtValue();
return ShiftVal >= Size / 2 && ShiftVal < Size;
}
@@ -1529,6 +2176,296 @@ bool CombinerHelper::tryCombineShiftToUnmerge(MachineInstr &MI,
return false;
}
+bool CombinerHelper::matchCombineI2PToP2I(MachineInstr &MI, Register &Reg) {
+ assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ Register SrcReg = MI.getOperand(1).getReg();
+ return mi_match(SrcReg, MRI,
+ m_GPtrToInt(m_all_of(m_SpecificType(DstTy), m_Reg(Reg))));
+}
+
+bool CombinerHelper::applyCombineI2PToP2I(MachineInstr &MI, Register &Reg) {
+ assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
+ Register DstReg = MI.getOperand(0).getReg();
+ Builder.setInstr(MI);
+ Builder.buildCopy(DstReg, Reg);
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::matchCombineP2IToI2P(MachineInstr &MI, Register &Reg) {
+ assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
+ Register SrcReg = MI.getOperand(1).getReg();
+ return mi_match(SrcReg, MRI, m_GIntToPtr(m_Reg(Reg)));
+}
+
+bool CombinerHelper::applyCombineP2IToI2P(MachineInstr &MI, Register &Reg) {
+ assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
+ Register DstReg = MI.getOperand(0).getReg();
+ Builder.setInstr(MI);
+ Builder.buildZExtOrTrunc(DstReg, Reg);
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::matchCombineAddP2IToPtrAdd(
+ MachineInstr &MI, std::pair<Register, bool> &PtrReg) {
+ assert(MI.getOpcode() == TargetOpcode::G_ADD);
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+ LLT IntTy = MRI.getType(LHS);
+
+ // G_PTR_ADD always has the pointer in the LHS, so we may need to commute the
+ // instruction.
+ PtrReg.second = false;
+ for (Register SrcReg : {LHS, RHS}) {
+ if (mi_match(SrcReg, MRI, m_GPtrToInt(m_Reg(PtrReg.first)))) {
+ // Don't handle cases where the integer is implicitly converted to the
+ // pointer width.
+ LLT PtrTy = MRI.getType(PtrReg.first);
+ if (PtrTy.getScalarSizeInBits() == IntTy.getScalarSizeInBits())
+ return true;
+ }
+
+ PtrReg.second = true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::applyCombineAddP2IToPtrAdd(
+ MachineInstr &MI, std::pair<Register, bool> &PtrReg) {
+ Register Dst = MI.getOperand(0).getReg();
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+
+ const bool DoCommute = PtrReg.second;
+ if (DoCommute)
+ std::swap(LHS, RHS);
+ LHS = PtrReg.first;
+
+ LLT PtrTy = MRI.getType(LHS);
+
+ Builder.setInstrAndDebugLoc(MI);
+ auto PtrAdd = Builder.buildPtrAdd(PtrTy, LHS, RHS);
+ Builder.buildPtrToInt(Dst, PtrAdd);
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::matchCombineConstPtrAddToI2P(MachineInstr &MI,
+ int64_t &NewCst) {
+ assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected a G_PTR_ADD");
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+ MachineRegisterInfo &MRI = Builder.getMF().getRegInfo();
+
+ if (auto RHSCst = getConstantVRegSExtVal(RHS, MRI)) {
+ int64_t Cst;
+ if (mi_match(LHS, MRI, m_GIntToPtr(m_ICst(Cst)))) {
+ NewCst = Cst + *RHSCst;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool CombinerHelper::applyCombineConstPtrAddToI2P(MachineInstr &MI,
+ int64_t &NewCst) {
+ assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected a G_PTR_ADD");
+ Register Dst = MI.getOperand(0).getReg();
+
+ Builder.setInstrAndDebugLoc(MI);
+ Builder.buildConstant(Dst, NewCst);
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) {
+ assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT");
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ return mi_match(SrcReg, MRI,
+ m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))));
+}
+
+bool CombinerHelper::applyCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) {
+ assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT");
+ Register DstReg = MI.getOperand(0).getReg();
+ MI.eraseFromParent();
+ replaceRegWith(MRI, DstReg, Reg);
+ return true;
+}
+
+bool CombinerHelper::matchCombineExtOfExt(
+ MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
+ assert((MI.getOpcode() == TargetOpcode::G_ANYEXT ||
+ MI.getOpcode() == TargetOpcode::G_SEXT ||
+ MI.getOpcode() == TargetOpcode::G_ZEXT) &&
+ "Expected a G_[ASZ]EXT");
+ Register SrcReg = MI.getOperand(1).getReg();
+ MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
+ // Match exts with the same opcode, anyext([sz]ext) and sext(zext).
+ unsigned Opc = MI.getOpcode();
+ unsigned SrcOpc = SrcMI->getOpcode();
+ if (Opc == SrcOpc ||
+ (Opc == TargetOpcode::G_ANYEXT &&
+ (SrcOpc == TargetOpcode::G_SEXT || SrcOpc == TargetOpcode::G_ZEXT)) ||
+ (Opc == TargetOpcode::G_SEXT && SrcOpc == TargetOpcode::G_ZEXT)) {
+ MatchInfo = std::make_tuple(SrcMI->getOperand(1).getReg(), SrcOpc);
+ return true;
+ }
+ return false;
+}
+
+bool CombinerHelper::applyCombineExtOfExt(
+ MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
+ assert((MI.getOpcode() == TargetOpcode::G_ANYEXT ||
+ MI.getOpcode() == TargetOpcode::G_SEXT ||
+ MI.getOpcode() == TargetOpcode::G_ZEXT) &&
+ "Expected a G_[ASZ]EXT");
+
+ Register Reg = std::get<0>(MatchInfo);
+ unsigned SrcExtOp = std::get<1>(MatchInfo);
+
+ // Combine exts with the same opcode.
+ if (MI.getOpcode() == SrcExtOp) {
+ Observer.changingInstr(MI);
+ MI.getOperand(1).setReg(Reg);
+ Observer.changedInstr(MI);
+ return true;
+ }
+
+ // Combine:
+ // - anyext([sz]ext x) to [sz]ext x
+ // - sext(zext x) to zext x
+ if (MI.getOpcode() == TargetOpcode::G_ANYEXT ||
+ (MI.getOpcode() == TargetOpcode::G_SEXT &&
+ SrcExtOp == TargetOpcode::G_ZEXT)) {
+ Register DstReg = MI.getOperand(0).getReg();
+ Builder.setInstrAndDebugLoc(MI);
+ Builder.buildInstr(SrcExtOp, {DstReg}, {Reg});
+ MI.eraseFromParent();
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::applyCombineMulByNegativeOne(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+
+ Builder.setInstrAndDebugLoc(MI);
+ Builder.buildSub(DstReg, Builder.buildConstant(DstTy, 0), SrcReg,
+ MI.getFlags());
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::matchCombineFNegOfFNeg(MachineInstr &MI, Register &Reg) {
+ assert(MI.getOpcode() == TargetOpcode::G_FNEG && "Expected a G_FNEG");
+ Register SrcReg = MI.getOperand(1).getReg();
+ return mi_match(SrcReg, MRI, m_GFNeg(m_Reg(Reg)));
+}
+
+bool CombinerHelper::matchCombineFAbsOfFAbs(MachineInstr &MI, Register &Src) {
+ assert(MI.getOpcode() == TargetOpcode::G_FABS && "Expected a G_FABS");
+ Src = MI.getOperand(1).getReg();
+ Register AbsSrc;
+ return mi_match(Src, MRI, m_GFabs(m_Reg(AbsSrc)));
+}
+
+bool CombinerHelper::applyCombineFAbsOfFAbs(MachineInstr &MI, Register &Src) {
+ assert(MI.getOpcode() == TargetOpcode::G_FABS && "Expected a G_FABS");
+ Register Dst = MI.getOperand(0).getReg();
+ MI.eraseFromParent();
+ replaceRegWith(MRI, Dst, Src);
+ return true;
+}
+
+bool CombinerHelper::matchCombineTruncOfExt(
+ MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
+ Register SrcReg = MI.getOperand(1).getReg();
+ MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
+ unsigned SrcOpc = SrcMI->getOpcode();
+ if (SrcOpc == TargetOpcode::G_ANYEXT || SrcOpc == TargetOpcode::G_SEXT ||
+ SrcOpc == TargetOpcode::G_ZEXT) {
+ MatchInfo = std::make_pair(SrcMI->getOperand(1).getReg(), SrcOpc);
+ return true;
+ }
+ return false;
+}
+
+bool CombinerHelper::applyCombineTruncOfExt(
+ MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
+ Register SrcReg = MatchInfo.first;
+ unsigned SrcExtOp = MatchInfo.second;
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT SrcTy = MRI.getType(SrcReg);
+ LLT DstTy = MRI.getType(DstReg);
+ if (SrcTy == DstTy) {
+ MI.eraseFromParent();
+ replaceRegWith(MRI, DstReg, SrcReg);
+ return true;
+ }
+ Builder.setInstrAndDebugLoc(MI);
+ if (SrcTy.getSizeInBits() < DstTy.getSizeInBits())
+ Builder.buildInstr(SrcExtOp, {DstReg}, {SrcReg});
+ else
+ Builder.buildTrunc(DstReg, SrcReg);
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::matchCombineTruncOfShl(
+ MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ Register ShiftSrc;
+ Register ShiftAmt;
+
+ if (MRI.hasOneNonDBGUse(SrcReg) &&
+ mi_match(SrcReg, MRI, m_GShl(m_Reg(ShiftSrc), m_Reg(ShiftAmt))) &&
+ isLegalOrBeforeLegalizer(
+ {TargetOpcode::G_SHL,
+ {DstTy, getTargetLowering().getPreferredShiftAmountTy(DstTy)}})) {
+ KnownBits Known = KB->getKnownBits(ShiftAmt);
+ unsigned Size = DstTy.getSizeInBits();
+ if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
+ MatchInfo = std::make_pair(ShiftSrc, ShiftAmt);
+ return true;
+ }
+ }
+ return false;
+}
+
+bool CombinerHelper::applyCombineTruncOfShl(
+ MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
+
+ Register ShiftSrc = MatchInfo.first;
+ Register ShiftAmt = MatchInfo.second;
+ Builder.setInstrAndDebugLoc(MI);
+ auto TruncShiftSrc = Builder.buildTrunc(DstTy, ShiftSrc);
+ Builder.buildShl(DstReg, TruncShiftSrc, ShiftAmt, SrcMI->getFlags());
+ MI.eraseFromParent();
+ return true;
+}
+
bool CombinerHelper::matchAnyExplicitUseIsUndef(MachineInstr &MI) {
return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
return MO.isReg() &&
@@ -1555,6 +2492,22 @@ bool CombinerHelper::matchUndefStore(MachineInstr &MI) {
MRI);
}
+bool CombinerHelper::matchUndefSelectCmp(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_SELECT);
+ return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(1).getReg(),
+ MRI);
+}
+
+bool CombinerHelper::matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx) {
+ assert(MI.getOpcode() == TargetOpcode::G_SELECT);
+ if (auto MaybeCstCmp =
+ getConstantVRegValWithLookThrough(MI.getOperand(1).getReg(), MRI)) {
+ OpIdx = MaybeCstCmp->Value.isNullValue() ? 3 : 2;
+ return true;
+ }
+ return false;
+}
+
bool CombinerHelper::eraseInst(MachineInstr &MI) {
MI.eraseFromParent();
return true;
@@ -1651,6 +2604,16 @@ bool CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI,
return true;
}
+bool CombinerHelper::replaceSingleDefInstWithReg(MachineInstr &MI,
+ Register Replacement) {
+ assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
+ Register OldReg = MI.getOperand(0).getReg();
+ assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
+ MI.eraseFromParent();
+ replaceRegWith(MRI, OldReg, Replacement);
+ return true;
+}
+
bool CombinerHelper::matchSelectSameVal(MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::G_SELECT);
// Match (cond ? x : x)
@@ -1671,6 +2634,18 @@ bool CombinerHelper::matchOperandIsZero(MachineInstr &MI, unsigned OpIdx) {
MRI);
}
+bool CombinerHelper::matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx) {
+ MachineOperand &MO = MI.getOperand(OpIdx);
+ return MO.isReg() &&
+ getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
+}
+
+bool CombinerHelper::matchOperandIsKnownToBeAPowerOfTwo(MachineInstr &MI,
+ unsigned OpIdx) {
+ MachineOperand &MO = MI.getOperand(OpIdx);
+ return isKnownToBeAPowerOfTwo(MO.getReg(), MRI, KB);
+}
+
bool CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, double C) {
assert(MI.getNumDefs() == 1 && "Expected only one def?");
Builder.setInstr(MI);
@@ -1706,9 +2681,7 @@ bool CombinerHelper::matchSimplifyAddToSub(
// ((0-A) + B) -> B - A
// (A + (0-B)) -> A - B
auto CheckFold = [&](Register &MaybeSub, Register &MaybeNewLHS) {
- int64_t Cst;
- if (!mi_match(MaybeSub, MRI, m_GSub(m_ICst(Cst), m_Reg(NewRHS))) ||
- Cst != 0)
+ if (!mi_match(MaybeSub, MRI, m_Neg(m_Reg(NewRHS))))
return false;
NewLHS = MaybeNewLHS;
return true;
@@ -1717,6 +2690,67 @@ bool CombinerHelper::matchSimplifyAddToSub(
return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
}
+bool CombinerHelper::matchCombineInsertVecElts(
+ MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT &&
+ "Invalid opcode");
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ assert(DstTy.isVector() && "Invalid G_INSERT_VECTOR_ELT?");
+ unsigned NumElts = DstTy.getNumElements();
+ // If this MI is part of a sequence of insert_vec_elts, then
+ // don't do the combine in the middle of the sequence.
+ if (MRI.hasOneUse(DstReg) && MRI.use_instr_begin(DstReg)->getOpcode() ==
+ TargetOpcode::G_INSERT_VECTOR_ELT)
+ return false;
+ MachineInstr *CurrInst = &MI;
+ MachineInstr *TmpInst;
+ int64_t IntImm;
+ Register TmpReg;
+ MatchInfo.resize(NumElts);
+ while (mi_match(
+ CurrInst->getOperand(0).getReg(), MRI,
+ m_GInsertVecElt(m_MInstr(TmpInst), m_Reg(TmpReg), m_ICst(IntImm)))) {
+ if (IntImm >= NumElts)
+ return false;
+ if (!MatchInfo[IntImm])
+ MatchInfo[IntImm] = TmpReg;
+ CurrInst = TmpInst;
+ }
+ // Variable index.
+ if (CurrInst->getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
+ return false;
+ if (TmpInst->getOpcode() == TargetOpcode::G_BUILD_VECTOR) {
+ for (unsigned I = 1; I < TmpInst->getNumOperands(); ++I) {
+ if (!MatchInfo[I - 1].isValid())
+ MatchInfo[I - 1] = TmpInst->getOperand(I).getReg();
+ }
+ return true;
+ }
+ // If we didn't end in a G_IMPLICIT_DEF, bail out.
+ return TmpInst->getOpcode() == TargetOpcode::G_IMPLICIT_DEF;
+}
+
+bool CombinerHelper::applyCombineInsertVecElts(
+ MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) {
+ Builder.setInstr(MI);
+ Register UndefReg;
+ auto GetUndef = [&]() {
+ if (UndefReg)
+ return UndefReg;
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ UndefReg = Builder.buildUndef(DstTy.getScalarType()).getReg(0);
+ return UndefReg;
+ };
+ for (unsigned I = 0; I < MatchInfo.size(); ++I) {
+ if (!MatchInfo[I])
+ MatchInfo[I] = GetUndef();
+ }
+ Builder.buildBuildVector(MI.getOperand(0).getReg(), MatchInfo);
+ MI.eraseFromParent();
+ return true;
+}
+
bool CombinerHelper::applySimplifyAddToSub(
MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
Builder.setInstr(MI);
@@ -1727,6 +2761,812 @@ bool CombinerHelper::applySimplifyAddToSub(
return true;
}
+bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands(
+ MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) {
+ // Matches: logic (hand x, ...), (hand y, ...) -> hand (logic x, y), ...
+ //
+ // Creates the new hand + logic instruction (but does not insert them.)
+ //
+ // On success, MatchInfo is populated with the new instructions. These are
+ // inserted in applyHoistLogicOpWithSameOpcodeHands.
+ unsigned LogicOpcode = MI.getOpcode();
+ assert(LogicOpcode == TargetOpcode::G_AND ||
+ LogicOpcode == TargetOpcode::G_OR ||
+ LogicOpcode == TargetOpcode::G_XOR);
+ MachineIRBuilder MIB(MI);
+ Register Dst = MI.getOperand(0).getReg();
+ Register LHSReg = MI.getOperand(1).getReg();
+ Register RHSReg = MI.getOperand(2).getReg();
+
+ // Don't recompute anything.
+ if (!MRI.hasOneNonDBGUse(LHSReg) || !MRI.hasOneNonDBGUse(RHSReg))
+ return false;
+
+ // Make sure we have (hand x, ...), (hand y, ...)
+ MachineInstr *LeftHandInst = getDefIgnoringCopies(LHSReg, MRI);
+ MachineInstr *RightHandInst = getDefIgnoringCopies(RHSReg, MRI);
+ if (!LeftHandInst || !RightHandInst)
+ return false;
+ unsigned HandOpcode = LeftHandInst->getOpcode();
+ if (HandOpcode != RightHandInst->getOpcode())
+ return false;
+ if (!LeftHandInst->getOperand(1).isReg() ||
+ !RightHandInst->getOperand(1).isReg())
+ return false;
+
+ // Make sure the types match up, and if we're doing this post-legalization,
+ // we end up with legal types.
+ Register X = LeftHandInst->getOperand(1).getReg();
+ Register Y = RightHandInst->getOperand(1).getReg();
+ LLT XTy = MRI.getType(X);
+ LLT YTy = MRI.getType(Y);
+ if (XTy != YTy)
+ return false;
+ if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}}))
+ return false;
+
+ // Optional extra source register.
+ Register ExtraHandOpSrcReg;
+ switch (HandOpcode) {
+ default:
+ return false;
+ case TargetOpcode::G_ANYEXT:
+ case TargetOpcode::G_SEXT:
+ case TargetOpcode::G_ZEXT: {
+ // Match: logic (ext X), (ext Y) --> ext (logic X, Y)
+ break;
+ }
+ case TargetOpcode::G_AND:
+ case TargetOpcode::G_ASHR:
+ case TargetOpcode::G_LSHR:
+ case TargetOpcode::G_SHL: {
+ // Match: logic (binop x, z), (binop y, z) -> binop (logic x, y), z
+ MachineOperand &ZOp = LeftHandInst->getOperand(2);
+ if (!matchEqualDefs(ZOp, RightHandInst->getOperand(2)))
+ return false;
+ ExtraHandOpSrcReg = ZOp.getReg();
+ break;
+ }
+ }
+
+ // Record the steps to build the new instructions.
+ //
+ // Steps to build (logic x, y)
+ auto NewLogicDst = MRI.createGenericVirtualRegister(XTy);
+ OperandBuildSteps LogicBuildSteps = {
+ [=](MachineInstrBuilder &MIB) { MIB.addDef(NewLogicDst); },
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(X); },
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(Y); }};
+ InstructionBuildSteps LogicSteps(LogicOpcode, LogicBuildSteps);
+
+ // Steps to build hand (logic x, y), ...z
+ OperandBuildSteps HandBuildSteps = {
+ [=](MachineInstrBuilder &MIB) { MIB.addDef(Dst); },
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(NewLogicDst); }};
+ if (ExtraHandOpSrcReg.isValid())
+ HandBuildSteps.push_back(
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(ExtraHandOpSrcReg); });
+ InstructionBuildSteps HandSteps(HandOpcode, HandBuildSteps);
+
+ MatchInfo = InstructionStepsMatchInfo({LogicSteps, HandSteps});
+ return true;
+}
+
+bool CombinerHelper::applyBuildInstructionSteps(
+ MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) {
+ assert(MatchInfo.InstrsToBuild.size() &&
+ "Expected at least one instr to build?");
+ Builder.setInstr(MI);
+ for (auto &InstrToBuild : MatchInfo.InstrsToBuild) {
+ assert(InstrToBuild.Opcode && "Expected a valid opcode?");
+ assert(InstrToBuild.OperandFns.size() && "Expected at least one operand?");
+ MachineInstrBuilder Instr = Builder.buildInstr(InstrToBuild.Opcode);
+ for (auto &OperandFn : InstrToBuild.OperandFns)
+ OperandFn(Instr);
+ }
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::matchAshrShlToSextInreg(
+ MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_ASHR);
+ int64_t ShlCst, AshrCst;
+ Register Src;
+ // FIXME: detect splat constant vectors.
+ if (!mi_match(MI.getOperand(0).getReg(), MRI,
+ m_GAShr(m_GShl(m_Reg(Src), m_ICst(ShlCst)), m_ICst(AshrCst))))
+ return false;
+ if (ShlCst != AshrCst)
+ return false;
+ if (!isLegalOrBeforeLegalizer(
+ {TargetOpcode::G_SEXT_INREG, {MRI.getType(Src)}}))
+ return false;
+ MatchInfo = std::make_tuple(Src, ShlCst);
+ return true;
+}
+bool CombinerHelper::applyAshShlToSextInreg(
+ MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_ASHR);
+ Register Src;
+ int64_t ShiftAmt;
+ std::tie(Src, ShiftAmt) = MatchInfo;
+ unsigned Size = MRI.getType(Src).getScalarSizeInBits();
+ Builder.setInstrAndDebugLoc(MI);
+ Builder.buildSExtInReg(MI.getOperand(0).getReg(), Src, Size - ShiftAmt);
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::matchRedundantAnd(MachineInstr &MI,
+ Register &Replacement) {
+ // Given
+ //
+ // %y:_(sN) = G_SOMETHING
+ // %x:_(sN) = G_SOMETHING
+ // %res:_(sN) = G_AND %x, %y
+ //
+ // Eliminate the G_AND when it is known that x & y == x or x & y == y.
+ //
+ // Patterns like this can appear as a result of legalization. E.g.
+ //
+ // %cmp:_(s32) = G_ICMP intpred(pred), %x(s32), %y
+ // %one:_(s32) = G_CONSTANT i32 1
+ // %and:_(s32) = G_AND %cmp, %one
+ //
+ // In this case, G_ICMP only produces a single bit, so x & 1 == x.
+ assert(MI.getOpcode() == TargetOpcode::G_AND);
+ if (!KB)
+ return false;
+
+ Register AndDst = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(AndDst);
+
+ // FIXME: This should be removed once GISelKnownBits supports vectors.
+ if (DstTy.isVector())
+ return false;
+
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+ KnownBits LHSBits = KB->getKnownBits(LHS);
+ KnownBits RHSBits = KB->getKnownBits(RHS);
+
+ // Check that x & Mask == x.
+ // x & 1 == x, always
+ // x & 0 == x, only if x is also 0
+ // Meaning Mask has no effect if every bit is either one in Mask or zero in x.
+ //
+ // Check if we can replace AndDst with the LHS of the G_AND
+ if (canReplaceReg(AndDst, LHS, MRI) &&
+ (LHSBits.Zero | RHSBits.One).isAllOnesValue()) {
+ Replacement = LHS;
+ return true;
+ }
+
+ // Check if we can replace AndDst with the RHS of the G_AND
+ if (canReplaceReg(AndDst, RHS, MRI) &&
+ (LHSBits.One | RHSBits.Zero).isAllOnesValue()) {
+ Replacement = RHS;
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchRedundantOr(MachineInstr &MI, Register &Replacement) {
+ // Given
+ //
+ // %y:_(sN) = G_SOMETHING
+ // %x:_(sN) = G_SOMETHING
+ // %res:_(sN) = G_OR %x, %y
+ //
+ // Eliminate the G_OR when it is known that x | y == x or x | y == y.
+ assert(MI.getOpcode() == TargetOpcode::G_OR);
+ if (!KB)
+ return false;
+
+ Register OrDst = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(OrDst);
+
+ // FIXME: This should be removed once GISelKnownBits supports vectors.
+ if (DstTy.isVector())
+ return false;
+
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+ KnownBits LHSBits = KB->getKnownBits(LHS);
+ KnownBits RHSBits = KB->getKnownBits(RHS);
+
+ // Check that x | Mask == x.
+ // x | 0 == x, always
+ // x | 1 == x, only if x is also 1
+ // Meaning Mask has no effect if every bit is either zero in Mask or one in x.
+ //
+ // Check if we can replace OrDst with the LHS of the G_OR
+ if (canReplaceReg(OrDst, LHS, MRI) &&
+ (LHSBits.One | RHSBits.Zero).isAllOnesValue()) {
+ Replacement = LHS;
+ return true;
+ }
+
+ // Check if we can replace OrDst with the RHS of the G_OR
+ if (canReplaceReg(OrDst, RHS, MRI) &&
+ (LHSBits.Zero | RHSBits.One).isAllOnesValue()) {
+ Replacement = RHS;
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchRedundantSExtInReg(MachineInstr &MI) {
+ // If the input is already sign extended, just drop the extension.
+ Register Src = MI.getOperand(1).getReg();
+ unsigned ExtBits = MI.getOperand(2).getImm();
+ unsigned TypeSize = MRI.getType(Src).getScalarSizeInBits();
+ return KB->computeNumSignBits(Src) >= (TypeSize - ExtBits + 1);
+}
+
+static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits,
+ int64_t Cst, bool IsVector, bool IsFP) {
+ // For i1, Cst will always be -1 regardless of boolean contents.
+ return (ScalarSizeBits == 1 && Cst == -1) ||
+ isConstTrueVal(TLI, Cst, IsVector, IsFP);
+}
+
+bool CombinerHelper::matchNotCmp(MachineInstr &MI,
+ SmallVectorImpl<Register> &RegsToNegate) {
+ assert(MI.getOpcode() == TargetOpcode::G_XOR);
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ const auto &TLI = *Builder.getMF().getSubtarget().getTargetLowering();
+ Register XorSrc;
+ Register CstReg;
+ // We match xor(src, true) here.
+ if (!mi_match(MI.getOperand(0).getReg(), MRI,
+ m_GXor(m_Reg(XorSrc), m_Reg(CstReg))))
+ return false;
+
+ if (!MRI.hasOneNonDBGUse(XorSrc))
+ return false;
+
+ // Check that XorSrc is the root of a tree of comparisons combined with ANDs
+ // and ORs. The suffix of RegsToNegate starting from index I is used a work
+ // list of tree nodes to visit.
+ RegsToNegate.push_back(XorSrc);
+ // Remember whether the comparisons are all integer or all floating point.
+ bool IsInt = false;
+ bool IsFP = false;
+ for (unsigned I = 0; I < RegsToNegate.size(); ++I) {
+ Register Reg = RegsToNegate[I];
+ if (!MRI.hasOneNonDBGUse(Reg))
+ return false;
+ MachineInstr *Def = MRI.getVRegDef(Reg);
+ switch (Def->getOpcode()) {
+ default:
+ // Don't match if the tree contains anything other than ANDs, ORs and
+ // comparisons.
+ return false;
+ case TargetOpcode::G_ICMP:
+ if (IsFP)
+ return false;
+ IsInt = true;
+ // When we apply the combine we will invert the predicate.
+ break;
+ case TargetOpcode::G_FCMP:
+ if (IsInt)
+ return false;
+ IsFP = true;
+ // When we apply the combine we will invert the predicate.
+ break;
+ case TargetOpcode::G_AND:
+ case TargetOpcode::G_OR:
+ // Implement De Morgan's laws:
+ // ~(x & y) -> ~x | ~y
+ // ~(x | y) -> ~x & ~y
+ // When we apply the combine we will change the opcode and recursively
+ // negate the operands.
+ RegsToNegate.push_back(Def->getOperand(1).getReg());
+ RegsToNegate.push_back(Def->getOperand(2).getReg());
+ break;
+ }
+ }
+
+ // Now we know whether the comparisons are integer or floating point, check
+ // the constant in the xor.
+ int64_t Cst;
+ if (Ty.isVector()) {
+ MachineInstr *CstDef = MRI.getVRegDef(CstReg);
+ auto MaybeCst = getBuildVectorConstantSplat(*CstDef, MRI);
+ if (!MaybeCst)
+ return false;
+ if (!isConstValidTrue(TLI, Ty.getScalarSizeInBits(), *MaybeCst, true, IsFP))
+ return false;
+ } else {
+ if (!mi_match(CstReg, MRI, m_ICst(Cst)))
+ return false;
+ if (!isConstValidTrue(TLI, Ty.getSizeInBits(), Cst, false, IsFP))
+ return false;
+ }
+
+ return true;
+}
+
+bool CombinerHelper::applyNotCmp(MachineInstr &MI,
+ SmallVectorImpl<Register> &RegsToNegate) {
+ for (Register Reg : RegsToNegate) {
+ MachineInstr *Def = MRI.getVRegDef(Reg);
+ Observer.changingInstr(*Def);
+ // For each comparison, invert the opcode. For each AND and OR, change the
+ // opcode.
+ switch (Def->getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected opcode");
+ case TargetOpcode::G_ICMP:
+ case TargetOpcode::G_FCMP: {
+ MachineOperand &PredOp = Def->getOperand(1);
+ CmpInst::Predicate NewP = CmpInst::getInversePredicate(
+ (CmpInst::Predicate)PredOp.getPredicate());
+ PredOp.setPredicate(NewP);
+ break;
+ }
+ case TargetOpcode::G_AND:
+ Def->setDesc(Builder.getTII().get(TargetOpcode::G_OR));
+ break;
+ case TargetOpcode::G_OR:
+ Def->setDesc(Builder.getTII().get(TargetOpcode::G_AND));
+ break;
+ }
+ Observer.changedInstr(*Def);
+ }
+
+ replaceRegWith(MRI, MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::matchXorOfAndWithSameReg(
+ MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
+ // Match (xor (and x, y), y) (or any of its commuted cases)
+ assert(MI.getOpcode() == TargetOpcode::G_XOR);
+ Register &X = MatchInfo.first;
+ Register &Y = MatchInfo.second;
+ Register AndReg = MI.getOperand(1).getReg();
+ Register SharedReg = MI.getOperand(2).getReg();
+
+ // Find a G_AND on either side of the G_XOR.
+ // Look for one of
+ //
+ // (xor (and x, y), SharedReg)
+ // (xor SharedReg, (and x, y))
+ if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) {
+ std::swap(AndReg, SharedReg);
+ if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y))))
+ return false;
+ }
+
+ // Only do this if we'll eliminate the G_AND.
+ if (!MRI.hasOneNonDBGUse(AndReg))
+ return false;
+
+ // We can combine if SharedReg is the same as either the LHS or RHS of the
+ // G_AND.
+ if (Y != SharedReg)
+ std::swap(X, Y);
+ return Y == SharedReg;
+}
+
+bool CombinerHelper::applyXorOfAndWithSameReg(
+ MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
+ // Fold (xor (and x, y), y) -> (and (not x), y)
+ Builder.setInstrAndDebugLoc(MI);
+ Register X, Y;
+ std::tie(X, Y) = MatchInfo;
+ auto Not = Builder.buildNot(MRI.getType(X), X);
+ Observer.changingInstr(MI);
+ MI.setDesc(Builder.getTII().get(TargetOpcode::G_AND));
+ MI.getOperand(1).setReg(Not->getOperand(0).getReg());
+ MI.getOperand(2).setReg(Y);
+ Observer.changedInstr(MI);
+ return true;
+}
+
+bool CombinerHelper::matchPtrAddZero(MachineInstr &MI) {
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(DstReg);
+ const DataLayout &DL = Builder.getMF().getDataLayout();
+
+ if (DL.isNonIntegralAddressSpace(Ty.getScalarType().getAddressSpace()))
+ return false;
+
+ if (Ty.isPointer()) {
+ auto ConstVal = getConstantVRegVal(MI.getOperand(1).getReg(), MRI);
+ return ConstVal && *ConstVal == 0;
+ }
+
+ assert(Ty.isVector() && "Expecting a vector type");
+ const MachineInstr *VecMI = MRI.getVRegDef(MI.getOperand(1).getReg());
+ return isBuildVectorAllZeros(*VecMI, MRI);
+}
+
+bool CombinerHelper::applyPtrAddZero(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD);
+ Builder.setInstrAndDebugLoc(MI);
+ Builder.buildIntToPtr(MI.getOperand(0), MI.getOperand(2));
+ MI.eraseFromParent();
+ return true;
+}
+
+/// The second source operand is known to be a power of 2.
+bool CombinerHelper::applySimplifyURemByPow2(MachineInstr &MI) {
+ Register DstReg = MI.getOperand(0).getReg();
+ Register Src0 = MI.getOperand(1).getReg();
+ Register Pow2Src1 = MI.getOperand(2).getReg();
+ LLT Ty = MRI.getType(DstReg);
+ Builder.setInstrAndDebugLoc(MI);
+
+ // Fold (urem x, pow2) -> (and x, pow2-1)
+ auto NegOne = Builder.buildConstant(Ty, -1);
+ auto Add = Builder.buildAdd(Ty, Pow2Src1, NegOne);
+ Builder.buildAnd(DstReg, Src0, Add);
+ MI.eraseFromParent();
+ return true;
+}
+
+Optional<SmallVector<Register, 8>>
+CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const {
+ assert(Root->getOpcode() == TargetOpcode::G_OR && "Expected G_OR only!");
+ // We want to detect if Root is part of a tree which represents a bunch
+ // of loads being merged into a larger load. We'll try to recognize patterns
+ // like, for example:
+ //
+ // Reg Reg
+ // \ /
+ // OR_1 Reg
+ // \ /
+ // OR_2
+ // \ Reg
+ // .. /
+ // Root
+ //
+ // Reg Reg Reg Reg
+ // \ / \ /
+ // OR_1 OR_2
+ // \ /
+ // \ /
+ // ...
+ // Root
+ //
+ // Each "Reg" may have been produced by a load + some arithmetic. This
+ // function will save each of them.
+ SmallVector<Register, 8> RegsToVisit;
+ SmallVector<const MachineInstr *, 7> Ors = {Root};
+
+ // In the "worst" case, we're dealing with a load for each byte. So, there
+ // are at most #bytes - 1 ORs.
+ const unsigned MaxIter =
+ MRI.getType(Root->getOperand(0).getReg()).getSizeInBytes() - 1;
+ for (unsigned Iter = 0; Iter < MaxIter; ++Iter) {
+ if (Ors.empty())
+ break;
+ const MachineInstr *Curr = Ors.pop_back_val();
+ Register OrLHS = Curr->getOperand(1).getReg();
+ Register OrRHS = Curr->getOperand(2).getReg();
+
+ // In the combine, we want to elimate the entire tree.
+ if (!MRI.hasOneNonDBGUse(OrLHS) || !MRI.hasOneNonDBGUse(OrRHS))
+ return None;
+
+ // If it's a G_OR, save it and continue to walk. If it's not, then it's
+ // something that may be a load + arithmetic.
+ if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrLHS, MRI))
+ Ors.push_back(Or);
+ else
+ RegsToVisit.push_back(OrLHS);
+ if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrRHS, MRI))
+ Ors.push_back(Or);
+ else
+ RegsToVisit.push_back(OrRHS);
+ }
+
+ // We're going to try and merge each register into a wider power-of-2 type,
+ // so we ought to have an even number of registers.
+ if (RegsToVisit.empty() || RegsToVisit.size() % 2 != 0)
+ return None;
+ return RegsToVisit;
+}
+
+/// Helper function for findLoadOffsetsForLoadOrCombine.
+///
+/// Check if \p Reg is the result of loading a \p MemSizeInBits wide value,
+/// and then moving that value into a specific byte offset.
+///
+/// e.g. x[i] << 24
+///
+/// \returns The load instruction and the byte offset it is moved into.
+static Optional<std::pair<MachineInstr *, int64_t>>
+matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits,
+ const MachineRegisterInfo &MRI) {
+ assert(MRI.hasOneNonDBGUse(Reg) &&
+ "Expected Reg to only have one non-debug use?");
+ Register MaybeLoad;
+ int64_t Shift;
+ if (!mi_match(Reg, MRI,
+ m_OneNonDBGUse(m_GShl(m_Reg(MaybeLoad), m_ICst(Shift))))) {
+ Shift = 0;
+ MaybeLoad = Reg;
+ }
+
+ if (Shift % MemSizeInBits != 0)
+ return None;
+
+ // TODO: Handle other types of loads.
+ auto *Load = getOpcodeDef(TargetOpcode::G_ZEXTLOAD, MaybeLoad, MRI);
+ if (!Load)
+ return None;
+
+ const auto &MMO = **Load->memoperands_begin();
+ if (!MMO.isUnordered() || MMO.getSizeInBits() != MemSizeInBits)
+ return None;
+
+ return std::make_pair(Load, Shift / MemSizeInBits);
+}
+
+Optional<std::pair<MachineInstr *, int64_t>>
+CombinerHelper::findLoadOffsetsForLoadOrCombine(
+ SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx,
+ const SmallVector<Register, 8> &RegsToVisit, const unsigned MemSizeInBits) {
+
+ // Each load found for the pattern. There should be one for each RegsToVisit.
+ SmallSetVector<const MachineInstr *, 8> Loads;
+
+ // The lowest index used in any load. (The lowest "i" for each x[i].)
+ int64_t LowestIdx = INT64_MAX;
+
+ // The load which uses the lowest index.
+ MachineInstr *LowestIdxLoad = nullptr;
+
+ // Keeps track of the load indices we see. We shouldn't see any indices twice.
+ SmallSet<int64_t, 8> SeenIdx;
+
+ // Ensure each load is in the same MBB.
+ // TODO: Support multiple MachineBasicBlocks.
+ MachineBasicBlock *MBB = nullptr;
+ const MachineMemOperand *MMO = nullptr;
+
+ // Earliest instruction-order load in the pattern.
+ MachineInstr *EarliestLoad = nullptr;
+
+ // Latest instruction-order load in the pattern.
+ MachineInstr *LatestLoad = nullptr;
+
+ // Base pointer which every load should share.
+ Register BasePtr;
+
+ // We want to find a load for each register. Each load should have some
+ // appropriate bit twiddling arithmetic. During this loop, we will also keep
+ // track of the load which uses the lowest index. Later, we will check if we
+ // can use its pointer in the final, combined load.
+ for (auto Reg : RegsToVisit) {
+ // Find the load, and find the position that it will end up in (e.g. a
+ // shifted) value.
+ auto LoadAndPos = matchLoadAndBytePosition(Reg, MemSizeInBits, MRI);
+ if (!LoadAndPos)
+ return None;
+ MachineInstr *Load;
+ int64_t DstPos;
+ std::tie(Load, DstPos) = *LoadAndPos;
+
+ // TODO: Handle multiple MachineBasicBlocks. Currently not handled because
+ // it is difficult to check for stores/calls/etc between loads.
+ MachineBasicBlock *LoadMBB = Load->getParent();
+ if (!MBB)
+ MBB = LoadMBB;
+ if (LoadMBB != MBB)
+ return None;
+
+ // Make sure that the MachineMemOperands of every seen load are compatible.
+ const MachineMemOperand *LoadMMO = *Load->memoperands_begin();
+ if (!MMO)
+ MMO = LoadMMO;
+ if (MMO->getAddrSpace() != LoadMMO->getAddrSpace())
+ return None;
+
+ // Find out what the base pointer and index for the load is.
+ Register LoadPtr;
+ int64_t Idx;
+ if (!mi_match(Load->getOperand(1).getReg(), MRI,
+ m_GPtrAdd(m_Reg(LoadPtr), m_ICst(Idx)))) {
+ LoadPtr = Load->getOperand(1).getReg();
+ Idx = 0;
+ }
+
+ // Don't combine things like a[i], a[i] -> a bigger load.
+ if (!SeenIdx.insert(Idx).second)
+ return None;
+
+ // Every load must share the same base pointer; don't combine things like:
+ //
+ // a[i], b[i + 1] -> a bigger load.
+ if (!BasePtr.isValid())
+ BasePtr = LoadPtr;
+ if (BasePtr != LoadPtr)
+ return None;
+
+ if (Idx < LowestIdx) {
+ LowestIdx = Idx;
+ LowestIdxLoad = Load;
+ }
+
+ // Keep track of the byte offset that this load ends up at. If we have seen
+ // the byte offset, then stop here. We do not want to combine:
+ //
+ // a[i] << 16, a[i + k] << 16 -> a bigger load.
+ if (!MemOffset2Idx.try_emplace(DstPos, Idx).second)
+ return None;
+ Loads.insert(Load);
+
+ // Keep track of the position of the earliest/latest loads in the pattern.
+ // We will check that there are no load fold barriers between them later
+ // on.
+ //
+ // FIXME: Is there a better way to check for load fold barriers?
+ if (!EarliestLoad || dominates(*Load, *EarliestLoad))
+ EarliestLoad = Load;
+ if (!LatestLoad || dominates(*LatestLoad, *Load))
+ LatestLoad = Load;
+ }
+
+ // We found a load for each register. Let's check if each load satisfies the
+ // pattern.
+ assert(Loads.size() == RegsToVisit.size() &&
+ "Expected to find a load for each register?");
+ assert(EarliestLoad != LatestLoad && EarliestLoad &&
+ LatestLoad && "Expected at least two loads?");
+
+ // Check if there are any stores, calls, etc. between any of the loads. If
+ // there are, then we can't safely perform the combine.
+ //
+ // MaxIter is chosen based off the (worst case) number of iterations it
+ // typically takes to succeed in the LLVM test suite plus some padding.
+ //
+ // FIXME: Is there a better way to check for load fold barriers?
+ const unsigned MaxIter = 20;
+ unsigned Iter = 0;
+ for (const auto &MI : instructionsWithoutDebug(EarliestLoad->getIterator(),
+ LatestLoad->getIterator())) {
+ if (Loads.count(&MI))
+ continue;
+ if (MI.isLoadFoldBarrier())
+ return None;
+ if (Iter++ == MaxIter)
+ return None;
+ }
+
+ return std::make_pair(LowestIdxLoad, LowestIdx);
+}
+
+bool CombinerHelper::matchLoadOrCombine(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_OR);
+ MachineFunction &MF = *MI.getMF();
+ // Assuming a little-endian target, transform:
+ // s8 *a = ...
+ // s32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
+ // =>
+ // s32 val = *((i32)a)
+ //
+ // s8 *a = ...
+ // s32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
+ // =>
+ // s32 val = BSWAP(*((s32)a))
+ Register Dst = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(Dst);
+ if (Ty.isVector())
+ return false;
+
+ // We need to combine at least two loads into this type. Since the smallest
+ // possible load is into a byte, we need at least a 16-bit wide type.
+ const unsigned WideMemSizeInBits = Ty.getSizeInBits();
+ if (WideMemSizeInBits < 16 || WideMemSizeInBits % 8 != 0)
+ return false;
+
+ // Match a collection of non-OR instructions in the pattern.
+ auto RegsToVisit = findCandidatesForLoadOrCombine(&MI);
+ if (!RegsToVisit)
+ return false;
+
+ // We have a collection of non-OR instructions. Figure out how wide each of
+ // the small loads should be based off of the number of potential loads we
+ // found.
+ const unsigned NarrowMemSizeInBits = WideMemSizeInBits / RegsToVisit->size();
+ if (NarrowMemSizeInBits % 8 != 0)
+ return false;
+
+ // Check if each register feeding into each OR is a load from the same
+ // base pointer + some arithmetic.
+ //
+ // e.g. a[0], a[1] << 8, a[2] << 16, etc.
+ //
+ // Also verify that each of these ends up putting a[i] into the same memory
+ // offset as a load into a wide type would.
+ SmallDenseMap<int64_t, int64_t, 8> MemOffset2Idx;
+ MachineInstr *LowestIdxLoad;
+ int64_t LowestIdx;
+ auto MaybeLoadInfo = findLoadOffsetsForLoadOrCombine(
+ MemOffset2Idx, *RegsToVisit, NarrowMemSizeInBits);
+ if (!MaybeLoadInfo)
+ return false;
+ std::tie(LowestIdxLoad, LowestIdx) = *MaybeLoadInfo;
+
+ // We have a bunch of loads being OR'd together. Using the addresses + offsets
+ // we found before, check if this corresponds to a big or little endian byte
+ // pattern. If it does, then we can represent it using a load + possibly a
+ // BSWAP.
+ bool IsBigEndianTarget = MF.getDataLayout().isBigEndian();
+ Optional<bool> IsBigEndian = isBigEndian(MemOffset2Idx, LowestIdx);
+ if (!IsBigEndian.hasValue())
+ return false;
+ bool NeedsBSwap = IsBigEndianTarget != *IsBigEndian;
+ if (NeedsBSwap && !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {Ty}}))
+ return false;
+
+ // Make sure that the load from the lowest index produces offset 0 in the
+ // final value.
+ //
+ // This ensures that we won't combine something like this:
+ //
+ // load x[i] -> byte 2
+ // load x[i+1] -> byte 0 ---> wide_load x[i]
+ // load x[i+2] -> byte 1
+ const unsigned NumLoadsInTy = WideMemSizeInBits / NarrowMemSizeInBits;
+ const unsigned ZeroByteOffset =
+ *IsBigEndian
+ ? bigEndianByteAt(NumLoadsInTy, 0)
+ : littleEndianByteAt(NumLoadsInTy, 0);
+ auto ZeroOffsetIdx = MemOffset2Idx.find(ZeroByteOffset);
+ if (ZeroOffsetIdx == MemOffset2Idx.end() ||
+ ZeroOffsetIdx->second != LowestIdx)
+ return false;
+
+ // We wil reuse the pointer from the load which ends up at byte offset 0. It
+ // may not use index 0.
+ Register Ptr = LowestIdxLoad->getOperand(1).getReg();
+ const MachineMemOperand &MMO = **LowestIdxLoad->memoperands_begin();
+ LegalityQuery::MemDesc MMDesc;
+ MMDesc.SizeInBits = WideMemSizeInBits;
+ MMDesc.AlignInBits = MMO.getAlign().value() * 8;
+ MMDesc.Ordering = MMO.getOrdering();
+ if (!isLegalOrBeforeLegalizer(
+ {TargetOpcode::G_LOAD, {Ty, MRI.getType(Ptr)}, {MMDesc}}))
+ return false;
+ auto PtrInfo = MMO.getPointerInfo();
+ auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, WideMemSizeInBits / 8);
+
+ // Load must be allowed and fast on the target.
+ LLVMContext &C = MF.getFunction().getContext();
+ auto &DL = MF.getDataLayout();
+ bool Fast = false;
+ if (!getTargetLowering().allowsMemoryAccess(C, DL, Ty, *NewMMO, &Fast) ||
+ !Fast)
+ return false;
+
+ MatchInfo = [=](MachineIRBuilder &MIB) {
+ Register LoadDst = NeedsBSwap ? MRI.cloneVirtualRegister(Dst) : Dst;
+ MIB.buildLoad(LoadDst, Ptr, *NewMMO);
+ if (NeedsBSwap)
+ MIB.buildBSwap(Dst, LoadDst);
+ };
+ return true;
+}
+
+bool CombinerHelper::applyLoadOrCombine(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ Builder.setInstrAndDebugLoc(MI);
+ MatchInfo(Builder);
+ MI.eraseFromParent();
+ return true;
+}
+
bool CombinerHelper::tryCombine(MachineInstr &MI) {
if (tryCombineCopy(MI))
return true;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp
index bdaa6378e901..59f4d60a41d8 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp
@@ -16,7 +16,7 @@
using namespace llvm;
void GISelChangeObserver::changingAllUsesOfReg(
- const MachineRegisterInfo &MRI, unsigned Reg) {
+ const MachineRegisterInfo &MRI, Register Reg) {
for (auto &ChangingMI : MRI.use_instructions(Reg)) {
changingInstr(ChangingMI);
ChangingAllUsesOfReg.insert(&ChangingMI);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
index 0e9c6e4fab9f..2de20489e1d1 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
@@ -11,7 +11,6 @@
//
//===------------------
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -95,6 +94,25 @@ dumpResult(const MachineInstr &MI, const KnownBits &Known, unsigned Depth) {
<< "\n";
}
+/// Compute known bits for the intersection of \p Src0 and \p Src1
+void GISelKnownBits::computeKnownBitsMin(Register Src0, Register Src1,
+ KnownBits &Known,
+ const APInt &DemandedElts,
+ unsigned Depth) {
+ // Test src1 first, since we canonicalize simpler expressions to the RHS.
+ computeKnownBitsImpl(Src1, Known, DemandedElts, Depth);
+
+ // If we don't know any bits, early out.
+ if (Known.isUnknown())
+ return;
+
+ KnownBits Known2;
+ computeKnownBitsImpl(Src0, Known2, DemandedElts, Depth);
+
+ // Only known if known in both the LHS and RHS.
+ Known = KnownBits::commonBits(Known, Known2);
+}
+
void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
const APInt &DemandedElts,
unsigned Depth) {
@@ -182,8 +200,7 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
// For COPYs we don't do anything, don't increase the depth.
computeKnownBitsImpl(SrcReg, Known2, DemandedElts,
Depth + (Opcode != TargetOpcode::COPY));
- Known.One &= Known2.One;
- Known.Zero &= Known2.Zero;
+ Known = KnownBits::commonBits(Known, Known2);
// If we reach a point where we don't know anything
// just stop looking through the operands.
if (Known.One == 0 && Known.Zero == 0)
@@ -200,8 +217,7 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
auto CstVal = getConstantVRegVal(R, MRI);
if (!CstVal)
break;
- Known.One = *CstVal;
- Known.Zero = ~Known.One;
+ Known = KnownBits::makeConstant(*CstVal);
break;
}
case TargetOpcode::G_FRAME_INDEX: {
@@ -268,33 +284,50 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
Depth + 1);
computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
Depth + 1);
- // If low bits are zero in either operand, output low known-0 bits.
- // Also compute a conservative estimate for high known-0 bits.
- // More trickiness is possible, but this is sufficient for the
- // interesting case of alignment computation.
- unsigned TrailZ =
- Known.countMinTrailingZeros() + Known2.countMinTrailingZeros();
- unsigned LeadZ =
- std::max(Known.countMinLeadingZeros() + Known2.countMinLeadingZeros(),
- BitWidth) -
- BitWidth;
-
- Known.resetAll();
- Known.Zero.setLowBits(std::min(TrailZ, BitWidth));
- Known.Zero.setHighBits(std::min(LeadZ, BitWidth));
+ Known = KnownBits::computeForMul(Known, Known2);
break;
}
case TargetOpcode::G_SELECT: {
- computeKnownBitsImpl(MI.getOperand(3).getReg(), Known, DemandedElts,
+ computeKnownBitsMin(MI.getOperand(2).getReg(), MI.getOperand(3).getReg(),
+ Known, DemandedElts, Depth + 1);
+ break;
+ }
+ case TargetOpcode::G_SMIN: {
+ // TODO: Handle clamp pattern with number of sign bits
+ KnownBits KnownRHS;
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
Depth + 1);
- // If we don't know any bits, early out.
- if (Known.isUnknown())
- break;
- computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedElts,
+ computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts,
Depth + 1);
- // Only known if known in both the LHS and RHS.
- Known.One &= Known2.One;
- Known.Zero &= Known2.Zero;
+ Known = KnownBits::smin(Known, KnownRHS);
+ break;
+ }
+ case TargetOpcode::G_SMAX: {
+ // TODO: Handle clamp pattern with number of sign bits
+ KnownBits KnownRHS;
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
+ Depth + 1);
+ computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts,
+ Depth + 1);
+ Known = KnownBits::smax(Known, KnownRHS);
+ break;
+ }
+ case TargetOpcode::G_UMIN: {
+ KnownBits KnownRHS;
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), Known,
+ DemandedElts, Depth + 1);
+ computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS,
+ DemandedElts, Depth + 1);
+ Known = KnownBits::umin(Known, KnownRHS);
+ break;
+ }
+ case TargetOpcode::G_UMAX: {
+ KnownBits KnownRHS;
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), Known,
+ DemandedElts, Depth + 1);
+ computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS,
+ DemandedElts, Depth + 1);
+ Known = KnownBits::umax(Known, KnownRHS);
break;
}
case TargetOpcode::G_FCMP:
@@ -314,61 +347,56 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
Known = Known.sext(BitWidth);
break;
}
+ case TargetOpcode::G_SEXT_INREG: {
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
+ Depth + 1);
+ Known = Known.sextInReg(MI.getOperand(2).getImm());
+ break;
+ }
case TargetOpcode::G_ANYEXT: {
computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
Depth + 1);
- Known = Known.zext(BitWidth);
+ Known = Known.anyext(BitWidth);
break;
}
case TargetOpcode::G_LOAD: {
- if (MI.hasOneMemOperand()) {
- const MachineMemOperand *MMO = *MI.memoperands_begin();
- if (const MDNode *Ranges = MMO->getRanges()) {
- computeKnownBitsFromRangeMetadata(*Ranges, Known);
- }
+ const MachineMemOperand *MMO = *MI.memoperands_begin();
+ if (const MDNode *Ranges = MMO->getRanges()) {
+ computeKnownBitsFromRangeMetadata(*Ranges, Known);
}
+
break;
}
case TargetOpcode::G_ZEXTLOAD: {
// Everything above the retrieved bits is zero
- if (MI.hasOneMemOperand())
- Known.Zero.setBitsFrom((*MI.memoperands_begin())->getSizeInBits());
+ Known.Zero.setBitsFrom((*MI.memoperands_begin())->getSizeInBits());
break;
}
- case TargetOpcode::G_ASHR:
- case TargetOpcode::G_LSHR:
- case TargetOpcode::G_SHL: {
- KnownBits RHSKnown;
+ case TargetOpcode::G_ASHR: {
+ KnownBits LHSKnown, RHSKnown;
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts,
+ Depth + 1);
computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts,
Depth + 1);
- if (!RHSKnown.isConstant()) {
- LLVM_DEBUG(
- MachineInstr *RHSMI = MRI.getVRegDef(MI.getOperand(2).getReg());
- dbgs() << '[' << Depth << "] Shift not known constant: " << *RHSMI);
- break;
- }
- uint64_t Shift = RHSKnown.getConstant().getZExtValue();
- LLVM_DEBUG(dbgs() << '[' << Depth << "] Shift is " << Shift << '\n');
-
- computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
+ Known = KnownBits::ashr(LHSKnown, RHSKnown);
+ break;
+ }
+ case TargetOpcode::G_LSHR: {
+ KnownBits LHSKnown, RHSKnown;
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts,
Depth + 1);
-
- switch (Opcode) {
- case TargetOpcode::G_ASHR:
- Known.Zero = Known.Zero.ashr(Shift);
- Known.One = Known.One.ashr(Shift);
- break;
- case TargetOpcode::G_LSHR:
- Known.Zero = Known.Zero.lshr(Shift);
- Known.One = Known.One.lshr(Shift);
- Known.Zero.setBitsFrom(Known.Zero.getBitWidth() - Shift);
- break;
- case TargetOpcode::G_SHL:
- Known.Zero = Known.Zero.shl(Shift);
- Known.One = Known.One.shl(Shift);
- Known.Zero.setBits(0, Shift);
- break;
- }
+ computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts,
+ Depth + 1);
+ Known = KnownBits::lshr(LHSKnown, RHSKnown);
+ break;
+ }
+ case TargetOpcode::G_SHL: {
+ KnownBits LHSKnown, RHSKnown;
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts,
+ Depth + 1);
+ computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts,
+ Depth + 1);
+ Known = KnownBits::shl(LHSKnown, RHSKnown);
break;
}
case TargetOpcode::G_INTTOPTR:
@@ -390,6 +418,48 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
Known.Zero.setBitsFrom(SrcBitWidth);
break;
}
+ case TargetOpcode::G_MERGE_VALUES: {
+ unsigned NumOps = MI.getNumOperands();
+ unsigned OpSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
+
+ for (unsigned I = 0; I != NumOps - 1; ++I) {
+ KnownBits SrcOpKnown;
+ computeKnownBitsImpl(MI.getOperand(I + 1).getReg(), SrcOpKnown,
+ DemandedElts, Depth + 1);
+ Known.insertBits(SrcOpKnown, I * OpSize);
+ }
+ break;
+ }
+ case TargetOpcode::G_UNMERGE_VALUES: {
+ unsigned NumOps = MI.getNumOperands();
+ Register SrcReg = MI.getOperand(NumOps - 1).getReg();
+ if (MRI.getType(SrcReg).isVector())
+ return; // TODO: Handle vectors.
+
+ KnownBits SrcOpKnown;
+ computeKnownBitsImpl(SrcReg, SrcOpKnown, DemandedElts, Depth + 1);
+
+ // Figure out the result operand index
+ unsigned DstIdx = 0;
+ for (; DstIdx != NumOps - 1 && MI.getOperand(DstIdx).getReg() != R;
+ ++DstIdx)
+ ;
+
+ Known = SrcOpKnown.extractBits(BitWidth, BitWidth * DstIdx);
+ break;
+ }
+ case TargetOpcode::G_BSWAP: {
+ Register SrcReg = MI.getOperand(1).getReg();
+ computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1);
+ Known.byteSwap();
+ break;
+ }
+ case TargetOpcode::G_BITREVERSE: {
+ Register SrcReg = MI.getOperand(1).getReg();
+ computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1);
+ Known.reverseBits();
+ break;
+ }
}
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
@@ -399,6 +469,17 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
ComputeKnownBitsCache[R] = Known;
}
+/// Compute number of sign bits for the intersection of \p Src0 and \p Src1
+unsigned GISelKnownBits::computeNumSignBitsMin(Register Src0, Register Src1,
+ const APInt &DemandedElts,
+ unsigned Depth) {
+ // Test src1 first, since we canonicalize simpler expressions to the RHS.
+ unsigned Src1SignBits = computeNumSignBits(Src1, DemandedElts, Depth);
+ if (Src1SignBits == 1)
+ return 1;
+ return std::min(computeNumSignBits(Src0, DemandedElts, Depth), Src1SignBits);
+}
+
unsigned GISelKnownBits::computeNumSignBits(Register R,
const APInt &DemandedElts,
unsigned Depth) {
@@ -442,15 +523,30 @@ unsigned GISelKnownBits::computeNumSignBits(Register R,
unsigned Tmp = DstTy.getScalarSizeInBits() - SrcTy.getScalarSizeInBits();
return computeNumSignBits(Src, DemandedElts, Depth + 1) + Tmp;
}
+ case TargetOpcode::G_SEXT_INREG: {
+ // Max of the input and what this extends.
+ Register Src = MI.getOperand(1).getReg();
+ unsigned SrcBits = MI.getOperand(2).getImm();
+ unsigned InRegBits = TyBits - SrcBits + 1;
+ return std::max(computeNumSignBits(Src, DemandedElts, Depth + 1), InRegBits);
+ }
case TargetOpcode::G_SEXTLOAD: {
- Register Dst = MI.getOperand(0).getReg();
- LLT Ty = MRI.getType(Dst);
- // TODO: add vector support
- if (Ty.isVector())
- break;
- if (MI.hasOneMemOperand())
- return Ty.getSizeInBits() - (*MI.memoperands_begin())->getSizeInBits();
- break;
+ // FIXME: We need an in-memory type representation.
+ if (DstTy.isVector())
+ return 1;
+
+ // e.g. i16->i32 = '17' bits known.
+ const MachineMemOperand *MMO = *MI.memoperands_begin();
+ return TyBits - MMO->getSizeInBits() + 1;
+ }
+ case TargetOpcode::G_ZEXTLOAD: {
+ // FIXME: We need an in-memory type representation.
+ if (DstTy.isVector())
+ return 1;
+
+ // e.g. i16->i32 = '16' bits known.
+ const MachineMemOperand *MMO = *MI.memoperands_begin();
+ return TyBits - MMO->getSizeInBits();
}
case TargetOpcode::G_TRUNC: {
Register Src = MI.getOperand(1).getReg();
@@ -464,6 +560,11 @@ unsigned GISelKnownBits::computeNumSignBits(Register R,
return NumSrcSignBits - (NumSrcBits - DstTyBits);
break;
}
+ case TargetOpcode::G_SELECT: {
+ return computeNumSignBitsMin(MI.getOperand(2).getReg(),
+ MI.getOperand(3).getReg(), DemandedElts,
+ Depth + 1);
+ }
case TargetOpcode::G_INTRINSIC:
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
default: {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 8f6643b2f193..b97c369b832d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -29,9 +29,11 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/StackProtector.h"
+#include "llvm/CodeGen/SwitchLoweringUtils.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -48,11 +50,13 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
@@ -70,6 +74,7 @@
#include "llvm/Target/TargetMachine.h"
#include <algorithm>
#include <cassert>
+#include <cstddef>
#include <cstdint>
#include <iterator>
#include <string>
@@ -90,6 +95,8 @@ INITIALIZE_PASS_BEGIN(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI",
false, false)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(StackProtector)
INITIALIZE_PASS_END(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI",
false, false)
@@ -110,7 +117,8 @@ static void reportTranslationError(MachineFunction &MF,
ORE.emit(R);
}
-IRTranslator::IRTranslator() : MachineFunctionPass(ID) { }
+IRTranslator::IRTranslator(CodeGenOpt::Level optlevel)
+ : MachineFunctionPass(ID), OptLevel(optlevel) {}
#ifndef NDEBUG
namespace {
@@ -154,13 +162,17 @@ void IRTranslator::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<StackProtector>();
AU.addRequired<TargetPassConfig>();
AU.addRequired<GISelCSEAnalysisWrapperPass>();
+ if (OptLevel != CodeGenOpt::None)
+ AU.addRequired<BranchProbabilityInfoWrapperPass>();
getSelectionDAGFallbackAnalysisUsage(AU);
MachineFunctionPass::getAnalysisUsage(AU);
}
IRTranslator::ValueToVRegInfo::VRegListT &
IRTranslator::allocateVRegs(const Value &Val) {
- assert(!VMap.contains(Val) && "Value already allocated in VMap");
+ auto VRegsIt = VMap.findVRegs(Val);
+ if (VRegsIt != VMap.vregs_end())
+ return *VRegsIt->second;
auto *Regs = VMap.getVRegs(Val);
auto *Offsets = VMap.getOffsets(Val);
SmallVector<LLT, 4> SplitTys;
@@ -222,8 +234,9 @@ ArrayRef<Register> IRTranslator::getOrCreateVRegs(const Value &Val) {
}
int IRTranslator::getOrCreateFrameIndex(const AllocaInst &AI) {
- if (FrameIndices.find(&AI) != FrameIndices.end())
- return FrameIndices[&AI];
+ auto MapEntry = FrameIndices.find(&AI);
+ if (MapEntry != FrameIndices.end())
+ return MapEntry->second;
uint64_t ElementSize = DL->getTypeAllocSize(AI.getAllocatedType());
uint64_t Size =
@@ -293,25 +306,8 @@ bool IRTranslator::translateBinaryOp(unsigned Opcode, const User &U,
return true;
}
-bool IRTranslator::translateFSub(const User &U, MachineIRBuilder &MIRBuilder) {
- // -0.0 - X --> G_FNEG
- if (isa<Constant>(U.getOperand(0)) &&
- U.getOperand(0) == ConstantFP::getZeroValueForNegation(U.getType())) {
- Register Op1 = getOrCreateVReg(*U.getOperand(1));
- Register Res = getOrCreateVReg(U);
- uint16_t Flags = 0;
- if (isa<Instruction>(U)) {
- const Instruction &I = cast<Instruction>(U);
- Flags = MachineInstr::copyFlagsFromInstruction(I);
- }
- // Negate the last operand of the FSUB
- MIRBuilder.buildFNeg(Res, Op1, Flags);
- return true;
- }
- return translateBinaryOp(TargetOpcode::G_FSUB, U, MIRBuilder);
-}
-
-bool IRTranslator::translateFNeg(const User &U, MachineIRBuilder &MIRBuilder) {
+bool IRTranslator::translateUnaryOp(unsigned Opcode, const User &U,
+ MachineIRBuilder &MIRBuilder) {
Register Op0 = getOrCreateVReg(*U.getOperand(0));
Register Res = getOrCreateVReg(U);
uint16_t Flags = 0;
@@ -319,10 +315,14 @@ bool IRTranslator::translateFNeg(const User &U, MachineIRBuilder &MIRBuilder) {
const Instruction &I = cast<Instruction>(U);
Flags = MachineInstr::copyFlagsFromInstruction(I);
}
- MIRBuilder.buildFNeg(Res, Op0, Flags);
+ MIRBuilder.buildInstr(Opcode, {Res}, {Op0}, Flags);
return true;
}
+bool IRTranslator::translateFNeg(const User &U, MachineIRBuilder &MIRBuilder) {
+ return translateUnaryOp(TargetOpcode::G_FNEG, U, MIRBuilder);
+}
+
bool IRTranslator::translateCompare(const User &U,
MachineIRBuilder &MIRBuilder) {
auto *CI = dyn_cast<CmpInst>(&U);
@@ -368,31 +368,289 @@ bool IRTranslator::translateRet(const User &U, MachineIRBuilder &MIRBuilder) {
// The target may mess up with the insertion point, but
// this is not important as a return is the last instruction
// of the block anyway.
- return CLI->lowerReturn(MIRBuilder, Ret, VRegs, SwiftErrorVReg);
+ return CLI->lowerReturn(MIRBuilder, Ret, VRegs, FuncInfo, SwiftErrorVReg);
+}
+
+void IRTranslator::emitBranchForMergedCondition(
+ const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
+ MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB,
+ BranchProbability TProb, BranchProbability FProb, bool InvertCond) {
+ // If the leaf of the tree is a comparison, merge the condition into
+ // the caseblock.
+ if (const CmpInst *BOp = dyn_cast<CmpInst>(Cond)) {
+ CmpInst::Predicate Condition;
+ if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
+ Condition = InvertCond ? IC->getInversePredicate() : IC->getPredicate();
+ } else {
+ const FCmpInst *FC = cast<FCmpInst>(Cond);
+ Condition = InvertCond ? FC->getInversePredicate() : FC->getPredicate();
+ }
+
+ SwitchCG::CaseBlock CB(Condition, false, BOp->getOperand(0),
+ BOp->getOperand(1), nullptr, TBB, FBB, CurBB,
+ CurBuilder->getDebugLoc(), TProb, FProb);
+ SL->SwitchCases.push_back(CB);
+ return;
+ }
+
+ // Create a CaseBlock record representing this branch.
+ CmpInst::Predicate Pred = InvertCond ? CmpInst::ICMP_NE : CmpInst::ICMP_EQ;
+ SwitchCG::CaseBlock CB(
+ Pred, false, Cond, ConstantInt::getTrue(MF->getFunction().getContext()),
+ nullptr, TBB, FBB, CurBB, CurBuilder->getDebugLoc(), TProb, FProb);
+ SL->SwitchCases.push_back(CB);
+}
+
+static bool isValInBlock(const Value *V, const BasicBlock *BB) {
+ if (const Instruction *I = dyn_cast<Instruction>(V))
+ return I->getParent() == BB;
+ return true;
+}
+
+void IRTranslator::findMergedConditions(
+ const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
+ MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB,
+ Instruction::BinaryOps Opc, BranchProbability TProb,
+ BranchProbability FProb, bool InvertCond) {
+ using namespace PatternMatch;
+ assert((Opc == Instruction::And || Opc == Instruction::Or) &&
+ "Expected Opc to be AND/OR");
+ // Skip over not part of the tree and remember to invert op and operands at
+ // next level.
+ Value *NotCond;
+ if (match(Cond, m_OneUse(m_Not(m_Value(NotCond)))) &&
+ isValInBlock(NotCond, CurBB->getBasicBlock())) {
+ findMergedConditions(NotCond, TBB, FBB, CurBB, SwitchBB, Opc, TProb, FProb,
+ !InvertCond);
+ return;
+ }
+
+ const Instruction *BOp = dyn_cast<Instruction>(Cond);
+ const Value *BOpOp0, *BOpOp1;
+ // Compute the effective opcode for Cond, taking into account whether it needs
+ // to be inverted, e.g.
+ // and (not (or A, B)), C
+ // gets lowered as
+ // and (and (not A, not B), C)
+ Instruction::BinaryOps BOpc = (Instruction::BinaryOps)0;
+ if (BOp) {
+ BOpc = match(BOp, m_LogicalAnd(m_Value(BOpOp0), m_Value(BOpOp1)))
+ ? Instruction::And
+ : (match(BOp, m_LogicalOr(m_Value(BOpOp0), m_Value(BOpOp1)))
+ ? Instruction::Or
+ : (Instruction::BinaryOps)0);
+ if (InvertCond) {
+ if (BOpc == Instruction::And)
+ BOpc = Instruction::Or;
+ else if (BOpc == Instruction::Or)
+ BOpc = Instruction::And;
+ }
+ }
+
+ // If this node is not part of the or/and tree, emit it as a branch.
+ // Note that all nodes in the tree should have same opcode.
+ bool BOpIsInOrAndTree = BOpc && BOpc == Opc && BOp->hasOneUse();
+ if (!BOpIsInOrAndTree || BOp->getParent() != CurBB->getBasicBlock() ||
+ !isValInBlock(BOpOp0, CurBB->getBasicBlock()) ||
+ !isValInBlock(BOpOp1, CurBB->getBasicBlock())) {
+ emitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB, TProb, FProb,
+ InvertCond);
+ return;
+ }
+
+ // Create TmpBB after CurBB.
+ MachineFunction::iterator BBI(CurBB);
+ MachineBasicBlock *TmpBB =
+ MF->CreateMachineBasicBlock(CurBB->getBasicBlock());
+ CurBB->getParent()->insert(++BBI, TmpBB);
+
+ if (Opc == Instruction::Or) {
+ // Codegen X | Y as:
+ // BB1:
+ // jmp_if_X TBB
+ // jmp TmpBB
+ // TmpBB:
+ // jmp_if_Y TBB
+ // jmp FBB
+ //
+
+ // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
+ // The requirement is that
+ // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
+ // = TrueProb for original BB.
+ // Assuming the original probabilities are A and B, one choice is to set
+ // BB1's probabilities to A/2 and A/2+B, and set TmpBB's probabilities to
+ // A/(1+B) and 2B/(1+B). This choice assumes that
+ // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
+ // Another choice is to assume TrueProb for BB1 equals to TrueProb for
+ // TmpBB, but the math is more complicated.
+
+ auto NewTrueProb = TProb / 2;
+ auto NewFalseProb = TProb / 2 + FProb;
+ // Emit the LHS condition.
+ findMergedConditions(BOpOp0, TBB, TmpBB, CurBB, SwitchBB, Opc, NewTrueProb,
+ NewFalseProb, InvertCond);
+
+ // Normalize A/2 and B to get A/(1+B) and 2B/(1+B).
+ SmallVector<BranchProbability, 2> Probs{TProb / 2, FProb};
+ BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
+ // Emit the RHS condition into TmpBB.
+ findMergedConditions(BOpOp1, TBB, FBB, TmpBB, SwitchBB, Opc, Probs[0],
+ Probs[1], InvertCond);
+ } else {
+ assert(Opc == Instruction::And && "Unknown merge op!");
+ // Codegen X & Y as:
+ // BB1:
+ // jmp_if_X TmpBB
+ // jmp FBB
+ // TmpBB:
+ // jmp_if_Y TBB
+ // jmp FBB
+ //
+ // This requires creation of TmpBB after CurBB.
+
+ // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
+ // The requirement is that
+ // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
+ // = FalseProb for original BB.
+ // Assuming the original probabilities are A and B, one choice is to set
+ // BB1's probabilities to A+B/2 and B/2, and set TmpBB's probabilities to
+ // 2A/(1+A) and B/(1+A). This choice assumes that FalseProb for BB1 ==
+ // TrueProb for BB1 * FalseProb for TmpBB.
+
+ auto NewTrueProb = TProb + FProb / 2;
+ auto NewFalseProb = FProb / 2;
+ // Emit the LHS condition.
+ findMergedConditions(BOpOp0, TmpBB, FBB, CurBB, SwitchBB, Opc, NewTrueProb,
+ NewFalseProb, InvertCond);
+
+ // Normalize A and B/2 to get 2A/(1+A) and B/(1+A).
+ SmallVector<BranchProbability, 2> Probs{TProb, FProb / 2};
+ BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
+ // Emit the RHS condition into TmpBB.
+ findMergedConditions(BOpOp1, TBB, FBB, TmpBB, SwitchBB, Opc, Probs[0],
+ Probs[1], InvertCond);
+ }
+}
+
+bool IRTranslator::shouldEmitAsBranches(
+ const std::vector<SwitchCG::CaseBlock> &Cases) {
+ // For multiple cases, it's better to emit as branches.
+ if (Cases.size() != 2)
+ return true;
+
+ // If this is two comparisons of the same values or'd or and'd together, they
+ // will get folded into a single comparison, so don't emit two blocks.
+ if ((Cases[0].CmpLHS == Cases[1].CmpLHS &&
+ Cases[0].CmpRHS == Cases[1].CmpRHS) ||
+ (Cases[0].CmpRHS == Cases[1].CmpLHS &&
+ Cases[0].CmpLHS == Cases[1].CmpRHS)) {
+ return false;
+ }
+
+ // Handle: (X != null) | (Y != null) --> (X|Y) != 0
+ // Handle: (X == null) & (Y == null) --> (X|Y) == 0
+ if (Cases[0].CmpRHS == Cases[1].CmpRHS &&
+ Cases[0].PredInfo.Pred == Cases[1].PredInfo.Pred &&
+ isa<Constant>(Cases[0].CmpRHS) &&
+ cast<Constant>(Cases[0].CmpRHS)->isNullValue()) {
+ if (Cases[0].PredInfo.Pred == CmpInst::ICMP_EQ &&
+ Cases[0].TrueBB == Cases[1].ThisBB)
+ return false;
+ if (Cases[0].PredInfo.Pred == CmpInst::ICMP_NE &&
+ Cases[0].FalseBB == Cases[1].ThisBB)
+ return false;
+ }
+
+ return true;
}
bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) {
const BranchInst &BrInst = cast<BranchInst>(U);
- unsigned Succ = 0;
- if (!BrInst.isUnconditional()) {
- // We want a G_BRCOND to the true BB followed by an unconditional branch.
- Register Tst = getOrCreateVReg(*BrInst.getCondition());
- const BasicBlock &TrueTgt = *cast<BasicBlock>(BrInst.getSuccessor(Succ++));
- MachineBasicBlock &TrueBB = getMBB(TrueTgt);
- MIRBuilder.buildBrCond(Tst, TrueBB);
+ auto &CurMBB = MIRBuilder.getMBB();
+ auto *Succ0MBB = &getMBB(*BrInst.getSuccessor(0));
+
+ if (BrInst.isUnconditional()) {
+ // If the unconditional target is the layout successor, fallthrough.
+ if (!CurMBB.isLayoutSuccessor(Succ0MBB))
+ MIRBuilder.buildBr(*Succ0MBB);
+
+ // Link successors.
+ for (const BasicBlock *Succ : successors(&BrInst))
+ CurMBB.addSuccessor(&getMBB(*Succ));
+ return true;
}
- const BasicBlock &BrTgt = *cast<BasicBlock>(BrInst.getSuccessor(Succ));
- MachineBasicBlock &TgtBB = getMBB(BrTgt);
- MachineBasicBlock &CurBB = MIRBuilder.getMBB();
+ // If this condition is one of the special cases we handle, do special stuff
+ // now.
+ const Value *CondVal = BrInst.getCondition();
+ MachineBasicBlock *Succ1MBB = &getMBB(*BrInst.getSuccessor(1));
- // If the unconditional target is the layout successor, fallthrough.
- if (!CurBB.isLayoutSuccessor(&TgtBB))
- MIRBuilder.buildBr(TgtBB);
+ const auto &TLI = *MF->getSubtarget().getTargetLowering();
- // Link successors.
- for (const BasicBlock *Succ : successors(&BrInst))
- CurBB.addSuccessor(&getMBB(*Succ));
+ // If this is a series of conditions that are or'd or and'd together, emit
+ // this as a sequence of branches instead of setcc's with and/or operations.
+ // As long as jumps are not expensive (exceptions for multi-use logic ops,
+ // unpredictable branches, and vector extracts because those jumps are likely
+ // expensive for any target), this should improve performance.
+ // For example, instead of something like:
+ // cmp A, B
+ // C = seteq
+ // cmp D, E
+ // F = setle
+ // or C, F
+ // jnz foo
+ // Emit:
+ // cmp A, B
+ // je foo
+ // cmp D, E
+ // jle foo
+ using namespace PatternMatch;
+ const Instruction *CondI = dyn_cast<Instruction>(CondVal);
+ if (!TLI.isJumpExpensive() && CondI && CondI->hasOneUse() &&
+ !BrInst.hasMetadata(LLVMContext::MD_unpredictable)) {
+ Instruction::BinaryOps Opcode = (Instruction::BinaryOps)0;
+ Value *Vec;
+ const Value *BOp0, *BOp1;
+ if (match(CondI, m_LogicalAnd(m_Value(BOp0), m_Value(BOp1))))
+ Opcode = Instruction::And;
+ else if (match(CondI, m_LogicalOr(m_Value(BOp0), m_Value(BOp1))))
+ Opcode = Instruction::Or;
+
+ if (Opcode && !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) &&
+ match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value())))) {
+ findMergedConditions(CondI, Succ0MBB, Succ1MBB, &CurMBB, &CurMBB, Opcode,
+ getEdgeProbability(&CurMBB, Succ0MBB),
+ getEdgeProbability(&CurMBB, Succ1MBB),
+ /*InvertCond=*/false);
+ assert(SL->SwitchCases[0].ThisBB == &CurMBB && "Unexpected lowering!");
+
+ // Allow some cases to be rejected.
+ if (shouldEmitAsBranches(SL->SwitchCases)) {
+ // Emit the branch for this block.
+ emitSwitchCase(SL->SwitchCases[0], &CurMBB, *CurBuilder);
+ SL->SwitchCases.erase(SL->SwitchCases.begin());
+ return true;
+ }
+
+ // Okay, we decided not to do this, remove any inserted MBB's and clear
+ // SwitchCases.
+ for (unsigned I = 1, E = SL->SwitchCases.size(); I != E; ++I)
+ MF->erase(SL->SwitchCases[I].ThisBB);
+
+ SL->SwitchCases.clear();
+ }
+ }
+
+ // Create a CaseBlock record representing this branch.
+ SwitchCG::CaseBlock CB(CmpInst::ICMP_EQ, false, CondVal,
+ ConstantInt::getTrue(MF->getFunction().getContext()),
+ nullptr, Succ0MBB, Succ1MBB, &CurMBB,
+ CurBuilder->getDebugLoc());
+
+ // Use emitSwitchCase to actually insert the fast branch sequence for this
+ // cond branch.
+ emitSwitchCase(CB, &CurMBB, *CurBuilder);
return true;
}
@@ -457,6 +715,7 @@ bool IRTranslator::translateSwitch(const User &U, MachineIRBuilder &MIB) {
}
SL->findJumpTables(Clusters, &SI, DefaultMBB, nullptr, nullptr);
+ SL->findBitTestClusters(Clusters, &SI);
LLVM_DEBUG({
dbgs() << "Case clusters: ";
@@ -577,8 +836,23 @@ void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB,
const LLT i1Ty = LLT::scalar(1);
// Build the compare.
if (!CB.CmpMHS) {
- Register CondRHS = getOrCreateVReg(*CB.CmpRHS);
- Cond = MIB.buildICmp(CB.PredInfo.Pred, i1Ty, CondLHS, CondRHS).getReg(0);
+ const auto *CI = dyn_cast<ConstantInt>(CB.CmpRHS);
+ // For conditional branch lowering, we might try to do something silly like
+ // emit an G_ICMP to compare an existing G_ICMP i1 result with true. If so,
+ // just re-use the existing condition vreg.
+ if (CI && CI->getZExtValue() == 1 &&
+ MRI->getType(CondLHS).getSizeInBits() == 1 &&
+ CB.PredInfo.Pred == CmpInst::ICMP_EQ) {
+ Cond = CondLHS;
+ } else {
+ Register CondRHS = getOrCreateVReg(*CB.CmpRHS);
+ if (CmpInst::isFPPredicate(CB.PredInfo.Pred))
+ Cond =
+ MIB.buildFCmp(CB.PredInfo.Pred, i1Ty, CondLHS, CondRHS).getReg(0);
+ else
+ Cond =
+ MIB.buildICmp(CB.PredInfo.Pred, i1Ty, CondLHS, CondRHS).getReg(0);
+ }
} else {
assert(CB.PredInfo.Pred == CmpInst::ICMP_SLE &&
"Can only handle SLE ranges");
@@ -611,17 +885,8 @@ void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB,
addSuccessorWithProb(CB.ThisBB, CB.FalseBB, CB.FalseProb);
CB.ThisBB->normalizeSuccProbs();
- // if (SwitchBB->getBasicBlock() != CB.FalseBB->getBasicBlock())
- addMachineCFGPred({SwitchBB->getBasicBlock(), CB.FalseBB->getBasicBlock()},
- CB.ThisBB);
-
- // If the lhs block is the next block, invert the condition so that we can
- // fall through to the lhs instead of the rhs block.
- if (CB.TrueBB == CB.ThisBB->getNextNode()) {
- std::swap(CB.TrueBB, CB.FalseBB);
- auto True = MIB.buildConstant(i1Ty, 1);
- Cond = MIB.buildXor(i1Ty, Cond, True).getReg(0);
- }
+ addMachineCFGPred({SwitchBB->getBasicBlock(), CB.FalseBB->getBasicBlock()},
+ CB.ThisBB);
MIB.buildBrCond(Cond, *CB.TrueBB);
MIB.buildBr(*CB.FalseBB);
@@ -734,6 +999,156 @@ bool IRTranslator::lowerSwitchRangeWorkItem(SwitchCG::CaseClusterIt I,
return true;
}
+void IRTranslator::emitBitTestHeader(SwitchCG::BitTestBlock &B,
+ MachineBasicBlock *SwitchBB) {
+ MachineIRBuilder &MIB = *CurBuilder;
+ MIB.setMBB(*SwitchBB);
+
+ // Subtract the minimum value.
+ Register SwitchOpReg = getOrCreateVReg(*B.SValue);
+
+ LLT SwitchOpTy = MRI->getType(SwitchOpReg);
+ Register MinValReg = MIB.buildConstant(SwitchOpTy, B.First).getReg(0);
+ auto RangeSub = MIB.buildSub(SwitchOpTy, SwitchOpReg, MinValReg);
+
+ // Ensure that the type will fit the mask value.
+ LLT MaskTy = SwitchOpTy;
+ for (unsigned I = 0, E = B.Cases.size(); I != E; ++I) {
+ if (!isUIntN(SwitchOpTy.getSizeInBits(), B.Cases[I].Mask)) {
+ // Switch table case range are encoded into series of masks.
+ // Just use pointer type, it's guaranteed to fit.
+ MaskTy = LLT::scalar(64);
+ break;
+ }
+ }
+ Register SubReg = RangeSub.getReg(0);
+ if (SwitchOpTy != MaskTy)
+ SubReg = MIB.buildZExtOrTrunc(MaskTy, SubReg).getReg(0);
+
+ B.RegVT = getMVTForLLT(MaskTy);
+ B.Reg = SubReg;
+
+ MachineBasicBlock *MBB = B.Cases[0].ThisBB;
+
+ if (!B.OmitRangeCheck)
+ addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb);
+ addSuccessorWithProb(SwitchBB, MBB, B.Prob);
+
+ SwitchBB->normalizeSuccProbs();
+
+ if (!B.OmitRangeCheck) {
+ // Conditional branch to the default block.
+ auto RangeCst = MIB.buildConstant(SwitchOpTy, B.Range);
+ auto RangeCmp = MIB.buildICmp(CmpInst::Predicate::ICMP_UGT, LLT::scalar(1),
+ RangeSub, RangeCst);
+ MIB.buildBrCond(RangeCmp, *B.Default);
+ }
+
+ // Avoid emitting unnecessary branches to the next block.
+ if (MBB != SwitchBB->getNextNode())
+ MIB.buildBr(*MBB);
+}
+
+void IRTranslator::emitBitTestCase(SwitchCG::BitTestBlock &BB,
+ MachineBasicBlock *NextMBB,
+ BranchProbability BranchProbToNext,
+ Register Reg, SwitchCG::BitTestCase &B,
+ MachineBasicBlock *SwitchBB) {
+ MachineIRBuilder &MIB = *CurBuilder;
+ MIB.setMBB(*SwitchBB);
+
+ LLT SwitchTy = getLLTForMVT(BB.RegVT);
+ Register Cmp;
+ unsigned PopCount = countPopulation(B.Mask);
+ if (PopCount == 1) {
+ // Testing for a single bit; just compare the shift count with what it
+ // would need to be to shift a 1 bit in that position.
+ auto MaskTrailingZeros =
+ MIB.buildConstant(SwitchTy, countTrailingZeros(B.Mask));
+ Cmp =
+ MIB.buildICmp(ICmpInst::ICMP_EQ, LLT::scalar(1), Reg, MaskTrailingZeros)
+ .getReg(0);
+ } else if (PopCount == BB.Range) {
+ // There is only one zero bit in the range, test for it directly.
+ auto MaskTrailingOnes =
+ MIB.buildConstant(SwitchTy, countTrailingOnes(B.Mask));
+ Cmp = MIB.buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), Reg, MaskTrailingOnes)
+ .getReg(0);
+ } else {
+ // Make desired shift.
+ auto CstOne = MIB.buildConstant(SwitchTy, 1);
+ auto SwitchVal = MIB.buildShl(SwitchTy, CstOne, Reg);
+
+ // Emit bit tests and jumps.
+ auto CstMask = MIB.buildConstant(SwitchTy, B.Mask);
+ auto AndOp = MIB.buildAnd(SwitchTy, SwitchVal, CstMask);
+ auto CstZero = MIB.buildConstant(SwitchTy, 0);
+ Cmp = MIB.buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), AndOp, CstZero)
+ .getReg(0);
+ }
+
+ // The branch probability from SwitchBB to B.TargetBB is B.ExtraProb.
+ addSuccessorWithProb(SwitchBB, B.TargetBB, B.ExtraProb);
+ // The branch probability from SwitchBB to NextMBB is BranchProbToNext.
+ addSuccessorWithProb(SwitchBB, NextMBB, BranchProbToNext);
+ // It is not guaranteed that the sum of B.ExtraProb and BranchProbToNext is
+ // one as they are relative probabilities (and thus work more like weights),
+ // and hence we need to normalize them to let the sum of them become one.
+ SwitchBB->normalizeSuccProbs();
+
+ // Record the fact that the IR edge from the header to the bit test target
+ // will go through our new block. Neeeded for PHIs to have nodes added.
+ addMachineCFGPred({BB.Parent->getBasicBlock(), B.TargetBB->getBasicBlock()},
+ SwitchBB);
+
+ MIB.buildBrCond(Cmp, *B.TargetBB);
+
+ // Avoid emitting unnecessary branches to the next block.
+ if (NextMBB != SwitchBB->getNextNode())
+ MIB.buildBr(*NextMBB);
+}
+
+bool IRTranslator::lowerBitTestWorkItem(
+ SwitchCG::SwitchWorkListItem W, MachineBasicBlock *SwitchMBB,
+ MachineBasicBlock *CurMBB, MachineBasicBlock *DefaultMBB,
+ MachineIRBuilder &MIB, MachineFunction::iterator BBI,
+ BranchProbability DefaultProb, BranchProbability UnhandledProbs,
+ SwitchCG::CaseClusterIt I, MachineBasicBlock *Fallthrough,
+ bool FallthroughUnreachable) {
+ using namespace SwitchCG;
+ MachineFunction *CurMF = SwitchMBB->getParent();
+ // FIXME: Optimize away range check based on pivot comparisons.
+ BitTestBlock *BTB = &SL->BitTestCases[I->BTCasesIndex];
+ // The bit test blocks haven't been inserted yet; insert them here.
+ for (BitTestCase &BTC : BTB->Cases)
+ CurMF->insert(BBI, BTC.ThisBB);
+
+ // Fill in fields of the BitTestBlock.
+ BTB->Parent = CurMBB;
+ BTB->Default = Fallthrough;
+
+ BTB->DefaultProb = UnhandledProbs;
+ // If the cases in bit test don't form a contiguous range, we evenly
+ // distribute the probability on the edge to Fallthrough to two
+ // successors of CurMBB.
+ if (!BTB->ContiguousRange) {
+ BTB->Prob += DefaultProb / 2;
+ BTB->DefaultProb -= DefaultProb / 2;
+ }
+
+ if (FallthroughUnreachable) {
+ // Skip the range check if the fallthrough block is unreachable.
+ BTB->OmitRangeCheck = true;
+ }
+
+ // If we're in the right place, emit the bit test header right now.
+ if (CurMBB == SwitchMBB) {
+ emitBitTestHeader(*BTB, SwitchMBB);
+ BTB->Emitted = true;
+ }
+ return true;
+}
+
bool IRTranslator::lowerSwitchWorkItem(SwitchCG::SwitchWorkListItem W,
Value *Cond,
MachineBasicBlock *SwitchMBB,
@@ -794,9 +1209,15 @@ bool IRTranslator::lowerSwitchWorkItem(SwitchCG::SwitchWorkListItem W,
switch (I->Kind) {
case CC_BitTests: {
- LLVM_DEBUG(dbgs() << "Switch to bit test optimization unimplemented");
- return false; // Bit tests currently unimplemented.
+ if (!lowerBitTestWorkItem(W, SwitchMBB, CurMBB, DefaultMBB, MIB, BBI,
+ DefaultProb, UnhandledProbs, I, Fallthrough,
+ FallthroughUnreachable)) {
+ LLVM_DEBUG(dbgs() << "Failed to lower bit test for switch");
+ return false;
+ }
+ break;
}
+
case CC_JumpTable: {
if (!lowerJumpTableWorkItem(W, SwitchMBB, CurMBB, DefaultMBB, MIB, BBI,
UnhandledProbs, I, Fallthrough,
@@ -1137,16 +1558,33 @@ bool IRTranslator::translateGetElementPtr(const User &U,
bool IRTranslator::translateMemFunc(const CallInst &CI,
MachineIRBuilder &MIRBuilder,
- Intrinsic::ID ID) {
+ unsigned Opcode) {
// If the source is undef, then just emit a nop.
if (isa<UndefValue>(CI.getArgOperand(1)))
return true;
- ArrayRef<Register> Res;
- auto ICall = MIRBuilder.buildIntrinsic(ID, Res, true);
- for (auto AI = CI.arg_begin(), AE = CI.arg_end(); std::next(AI) != AE; ++AI)
- ICall.addUse(getOrCreateVReg(**AI));
+ SmallVector<Register, 3> SrcRegs;
+
+ unsigned MinPtrSize = UINT_MAX;
+ for (auto AI = CI.arg_begin(), AE = CI.arg_end(); std::next(AI) != AE; ++AI) {
+ Register SrcReg = getOrCreateVReg(**AI);
+ LLT SrcTy = MRI->getType(SrcReg);
+ if (SrcTy.isPointer())
+ MinPtrSize = std::min(SrcTy.getSizeInBits(), MinPtrSize);
+ SrcRegs.push_back(SrcReg);
+ }
+
+ LLT SizeTy = LLT::scalar(MinPtrSize);
+
+ // The size operand should be the minimum of the pointer sizes.
+ Register &SizeOpReg = SrcRegs[SrcRegs.size() - 1];
+ if (MRI->getType(SizeOpReg) != SizeTy)
+ SizeOpReg = MIRBuilder.buildZExtOrTrunc(SizeTy, SizeOpReg).getReg(0);
+
+ auto ICall = MIRBuilder.buildInstr(Opcode);
+ for (Register SrcReg : SrcRegs)
+ ICall.addUse(SrcReg);
Align DstAlign;
Align SrcAlign;
@@ -1175,7 +1613,7 @@ bool IRTranslator::translateMemFunc(const CallInst &CI,
ICall.addMemOperand(MF->getMachineMemOperand(
MachinePointerInfo(CI.getArgOperand(0)),
MachineMemOperand::MOStore | VolFlag, 1, DstAlign));
- if (ID != Intrinsic::memset)
+ if (Opcode != TargetOpcode::G_MEMSET)
ICall.addMemOperand(MF->getMachineMemOperand(
MachinePointerInfo(CI.getArgOperand(1)),
MachineMemOperand::MOLoad | VolFlag, 1, SrcAlign));
@@ -1214,6 +1652,16 @@ bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op,
return true;
}
+bool IRTranslator::translateFixedPointIntrinsic(unsigned Op, const CallInst &CI,
+ MachineIRBuilder &MIRBuilder) {
+ Register Dst = getOrCreateVReg(CI);
+ Register Src0 = getOrCreateVReg(*CI.getOperand(0));
+ Register Src1 = getOrCreateVReg(*CI.getOperand(1));
+ uint64_t Scale = cast<ConstantInt>(CI.getOperand(2))->getZExtValue();
+ MIRBuilder.buildInstr(Op, {Dst}, { Src0, Src1, Scale });
+ return true;
+}
+
unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) {
switch (ID) {
default:
@@ -1264,10 +1712,14 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) {
return TargetOpcode::G_FNEARBYINT;
case Intrinsic::pow:
return TargetOpcode::G_FPOW;
+ case Intrinsic::powi:
+ return TargetOpcode::G_FPOWI;
case Intrinsic::rint:
return TargetOpcode::G_FRINT;
case Intrinsic::round:
return TargetOpcode::G_INTRINSIC_ROUND;
+ case Intrinsic::roundeven:
+ return TargetOpcode::G_INTRINSIC_ROUNDEVEN;
case Intrinsic::sin:
return TargetOpcode::G_FSIN;
case Intrinsic::sqrt:
@@ -1278,6 +1730,31 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) {
return TargetOpcode::G_READCYCLECOUNTER;
case Intrinsic::ptrmask:
return TargetOpcode::G_PTRMASK;
+ case Intrinsic::lrint:
+ return TargetOpcode::G_INTRINSIC_LRINT;
+ // FADD/FMUL require checking the FMF, so are handled elsewhere.
+ case Intrinsic::vector_reduce_fmin:
+ return TargetOpcode::G_VECREDUCE_FMIN;
+ case Intrinsic::vector_reduce_fmax:
+ return TargetOpcode::G_VECREDUCE_FMAX;
+ case Intrinsic::vector_reduce_add:
+ return TargetOpcode::G_VECREDUCE_ADD;
+ case Intrinsic::vector_reduce_mul:
+ return TargetOpcode::G_VECREDUCE_MUL;
+ case Intrinsic::vector_reduce_and:
+ return TargetOpcode::G_VECREDUCE_AND;
+ case Intrinsic::vector_reduce_or:
+ return TargetOpcode::G_VECREDUCE_OR;
+ case Intrinsic::vector_reduce_xor:
+ return TargetOpcode::G_VECREDUCE_XOR;
+ case Intrinsic::vector_reduce_smax:
+ return TargetOpcode::G_VECREDUCE_SMAX;
+ case Intrinsic::vector_reduce_smin:
+ return TargetOpcode::G_VECREDUCE_SMIN;
+ case Intrinsic::vector_reduce_umax:
+ return TargetOpcode::G_VECREDUCE_UMAX;
+ case Intrinsic::vector_reduce_umin:
+ return TargetOpcode::G_VECREDUCE_UMIN;
}
return Intrinsic::not_intrinsic;
}
@@ -1370,7 +1847,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
// Get the underlying objects for the location passed on the lifetime
// marker.
SmallVector<const Value *, 4> Allocas;
- GetUnderlyingObjects(CI.getArgOperand(1), Allocas, *DL);
+ getUnderlyingObjects(CI.getArgOperand(1), Allocas);
// Iterate over each underlying object, creating lifetime markers for each
// static alloca. Quit if we find a non-static alloca.
@@ -1484,6 +1961,37 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
return translateBinaryOp(TargetOpcode::G_USUBSAT, CI, MIRBuilder);
case Intrinsic::ssub_sat:
return translateBinaryOp(TargetOpcode::G_SSUBSAT, CI, MIRBuilder);
+ case Intrinsic::ushl_sat:
+ return translateBinaryOp(TargetOpcode::G_USHLSAT, CI, MIRBuilder);
+ case Intrinsic::sshl_sat:
+ return translateBinaryOp(TargetOpcode::G_SSHLSAT, CI, MIRBuilder);
+ case Intrinsic::umin:
+ return translateBinaryOp(TargetOpcode::G_UMIN, CI, MIRBuilder);
+ case Intrinsic::umax:
+ return translateBinaryOp(TargetOpcode::G_UMAX, CI, MIRBuilder);
+ case Intrinsic::smin:
+ return translateBinaryOp(TargetOpcode::G_SMIN, CI, MIRBuilder);
+ case Intrinsic::smax:
+ return translateBinaryOp(TargetOpcode::G_SMAX, CI, MIRBuilder);
+ case Intrinsic::abs:
+ // TODO: Preserve "int min is poison" arg in GMIR?
+ return translateUnaryOp(TargetOpcode::G_ABS, CI, MIRBuilder);
+ case Intrinsic::smul_fix:
+ return translateFixedPointIntrinsic(TargetOpcode::G_SMULFIX, CI, MIRBuilder);
+ case Intrinsic::umul_fix:
+ return translateFixedPointIntrinsic(TargetOpcode::G_UMULFIX, CI, MIRBuilder);
+ case Intrinsic::smul_fix_sat:
+ return translateFixedPointIntrinsic(TargetOpcode::G_SMULFIXSAT, CI, MIRBuilder);
+ case Intrinsic::umul_fix_sat:
+ return translateFixedPointIntrinsic(TargetOpcode::G_UMULFIXSAT, CI, MIRBuilder);
+ case Intrinsic::sdiv_fix:
+ return translateFixedPointIntrinsic(TargetOpcode::G_SDIVFIX, CI, MIRBuilder);
+ case Intrinsic::udiv_fix:
+ return translateFixedPointIntrinsic(TargetOpcode::G_UDIVFIX, CI, MIRBuilder);
+ case Intrinsic::sdiv_fix_sat:
+ return translateFixedPointIntrinsic(TargetOpcode::G_SDIVFIXSAT, CI, MIRBuilder);
+ case Intrinsic::udiv_fix_sat:
+ return translateFixedPointIntrinsic(TargetOpcode::G_UDIVFIXSAT, CI, MIRBuilder);
case Intrinsic::fmuladd: {
const TargetMachine &TM = MF->getTarget();
const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
@@ -1507,10 +2015,24 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
}
return true;
}
+ case Intrinsic::convert_from_fp16:
+ // FIXME: This intrinsic should probably be removed from the IR.
+ MIRBuilder.buildFPExt(getOrCreateVReg(CI),
+ getOrCreateVReg(*CI.getArgOperand(0)),
+ MachineInstr::copyFlagsFromInstruction(CI));
+ return true;
+ case Intrinsic::convert_to_fp16:
+ // FIXME: This intrinsic should probably be removed from the IR.
+ MIRBuilder.buildFPTrunc(getOrCreateVReg(CI),
+ getOrCreateVReg(*CI.getArgOperand(0)),
+ MachineInstr::copyFlagsFromInstruction(CI));
+ return true;
case Intrinsic::memcpy:
+ return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMCPY);
case Intrinsic::memmove:
+ return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMMOVE);
case Intrinsic::memset:
- return translateMemFunc(CI, MIRBuilder, ID);
+ return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMSET);
case Intrinsic::eh_typeid_for: {
GlobalValue *GV = ExtractTypeInfo(CI.getArgOperand(0));
Register Reg = getOrCreateVReg(CI);
@@ -1593,7 +2115,18 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
}
case Intrinsic::invariant_end:
return true;
+ case Intrinsic::expect:
+ case Intrinsic::annotation:
+ case Intrinsic::ptr_annotation:
+ case Intrinsic::launder_invariant_group:
+ case Intrinsic::strip_invariant_group: {
+ // Drop the intrinsic, but forward the value.
+ MIRBuilder.buildCopy(getOrCreateVReg(CI),
+ getOrCreateVReg(*CI.getArgOperand(0)));
+ return true;
+ }
case Intrinsic::assume:
+ case Intrinsic::experimental_noalias_scope_decl:
case Intrinsic::var_annotation:
case Intrinsic::sideeffect:
// Discard annotate attributes, assumptions, and artificial side-effects.
@@ -1613,6 +2146,68 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
.addUse(getOrCreateVReg(*CI.getArgOperand(1)));
return true;
}
+ case Intrinsic::localescape: {
+ MachineBasicBlock &EntryMBB = MF->front();
+ StringRef EscapedName = GlobalValue::dropLLVMManglingEscape(MF->getName());
+
+ // Directly emit some LOCAL_ESCAPE machine instrs. Label assignment emission
+ // is the same on all targets.
+ for (unsigned Idx = 0, E = CI.getNumArgOperands(); Idx < E; ++Idx) {
+ Value *Arg = CI.getArgOperand(Idx)->stripPointerCasts();
+ if (isa<ConstantPointerNull>(Arg))
+ continue; // Skip null pointers. They represent a hole in index space.
+
+ int FI = getOrCreateFrameIndex(*cast<AllocaInst>(Arg));
+ MCSymbol *FrameAllocSym =
+ MF->getMMI().getContext().getOrCreateFrameAllocSymbol(EscapedName,
+ Idx);
+
+ // This should be inserted at the start of the entry block.
+ auto LocalEscape =
+ MIRBuilder.buildInstrNoInsert(TargetOpcode::LOCAL_ESCAPE)
+ .addSym(FrameAllocSym)
+ .addFrameIndex(FI);
+
+ EntryMBB.insert(EntryMBB.begin(), LocalEscape);
+ }
+
+ return true;
+ }
+ case Intrinsic::vector_reduce_fadd:
+ case Intrinsic::vector_reduce_fmul: {
+ // Need to check for the reassoc flag to decide whether we want a
+ // sequential reduction opcode or not.
+ Register Dst = getOrCreateVReg(CI);
+ Register ScalarSrc = getOrCreateVReg(*CI.getArgOperand(0));
+ Register VecSrc = getOrCreateVReg(*CI.getArgOperand(1));
+ unsigned Opc = 0;
+ if (!CI.hasAllowReassoc()) {
+ // The sequential ordering case.
+ Opc = ID == Intrinsic::vector_reduce_fadd
+ ? TargetOpcode::G_VECREDUCE_SEQ_FADD
+ : TargetOpcode::G_VECREDUCE_SEQ_FMUL;
+ MIRBuilder.buildInstr(Opc, {Dst}, {ScalarSrc, VecSrc},
+ MachineInstr::copyFlagsFromInstruction(CI));
+ return true;
+ }
+ // We split the operation into a separate G_FADD/G_FMUL + the reduce,
+ // since the associativity doesn't matter.
+ unsigned ScalarOpc;
+ if (ID == Intrinsic::vector_reduce_fadd) {
+ Opc = TargetOpcode::G_VECREDUCE_FADD;
+ ScalarOpc = TargetOpcode::G_FADD;
+ } else {
+ Opc = TargetOpcode::G_VECREDUCE_FMUL;
+ ScalarOpc = TargetOpcode::G_FMUL;
+ }
+ LLT DstTy = MRI->getType(Dst);
+ auto Rdx = MIRBuilder.buildInstr(
+ Opc, {DstTy}, {VecSrc}, MachineInstr::copyFlagsFromInstruction(CI));
+ MIRBuilder.buildInstr(ScalarOpc, {Dst}, {ScalarSrc, Rdx},
+ MachineInstr::copyFlagsFromInstruction(CI));
+
+ return true;
+ }
#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \
case Intrinsic::INTRINSIC:
#include "llvm/IR/ConstrainedOps.def"
@@ -1722,10 +2317,6 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
MIB->copyIRFlags(CI);
for (auto &Arg : enumerate(CI.arg_operands())) {
- // Some intrinsics take metadata parameters. Reject them.
- if (isa<MetadataAsValue>(Arg.value()))
- return false;
-
// If this is required to be an immediate, don't materialize it in a
// register.
if (CI.paramHasAttr(Arg.index(), Attribute::ImmArg)) {
@@ -1738,6 +2329,11 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
} else {
MIB.addFPImm(cast<ConstantFP>(Arg.value()));
}
+ } else if (auto MD = dyn_cast<MetadataAsValue>(Arg.value())) {
+ auto *MDN = dyn_cast<MDNode>(MD->getMetadata());
+ if (!MDN) // This was probably an MDString.
+ return false;
+ MIB.addMetadata(MDN);
} else {
ArrayRef<Register> VRegs = getOrCreateVRegs(*Arg.value());
if (VRegs.size() > 1)
@@ -1762,6 +2358,62 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
return true;
}
+bool IRTranslator::findUnwindDestinations(
+ const BasicBlock *EHPadBB,
+ BranchProbability Prob,
+ SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>>
+ &UnwindDests) {
+ EHPersonality Personality = classifyEHPersonality(
+ EHPadBB->getParent()->getFunction().getPersonalityFn());
+ bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX;
+ bool IsCoreCLR = Personality == EHPersonality::CoreCLR;
+ bool IsWasmCXX = Personality == EHPersonality::Wasm_CXX;
+ bool IsSEH = isAsynchronousEHPersonality(Personality);
+
+ if (IsWasmCXX) {
+ // Ignore this for now.
+ return false;
+ }
+
+ while (EHPadBB) {
+ const Instruction *Pad = EHPadBB->getFirstNonPHI();
+ BasicBlock *NewEHPadBB = nullptr;
+ if (isa<LandingPadInst>(Pad)) {
+ // Stop on landingpads. They are not funclets.
+ UnwindDests.emplace_back(&getMBB(*EHPadBB), Prob);
+ break;
+ }
+ if (isa<CleanupPadInst>(Pad)) {
+ // Stop on cleanup pads. Cleanups are always funclet entries for all known
+ // personalities.
+ UnwindDests.emplace_back(&getMBB(*EHPadBB), Prob);
+ UnwindDests.back().first->setIsEHScopeEntry();
+ UnwindDests.back().first->setIsEHFuncletEntry();
+ break;
+ }
+ if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) {
+ // Add the catchpad handlers to the possible destinations.
+ for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
+ UnwindDests.emplace_back(&getMBB(*CatchPadBB), Prob);
+ // For MSVC++ and the CLR, catchblocks are funclets and need prologues.
+ if (IsMSVCCXX || IsCoreCLR)
+ UnwindDests.back().first->setIsEHFuncletEntry();
+ if (!IsSEH)
+ UnwindDests.back().first->setIsEHScopeEntry();
+ }
+ NewEHPadBB = CatchSwitch->getUnwindDest();
+ } else {
+ continue;
+ }
+
+ BranchProbabilityInfo *BPI = FuncInfo.BPI;
+ if (BPI && NewEHPadBB)
+ Prob *= BPI->getEdgeProbability(EHPadBB, NewEHPadBB);
+ EHPadBB = NewEHPadBB;
+ }
+ return true;
+}
+
bool IRTranslator::translateInvoke(const User &U,
MachineIRBuilder &MIRBuilder) {
const InvokeInst &I = cast<InvokeInst>(U);
@@ -1787,7 +2439,7 @@ bool IRTranslator::translateInvoke(const User &U,
return false;
// FIXME: support Windows exception handling.
- if (!isa<LandingPadInst>(EHPadBB->front()))
+ if (!isa<LandingPadInst>(EHPadBB->getFirstNonPHI()))
return false;
// Emit the actual call, bracketed by EH_LABELs so that the MF knows about
@@ -1801,14 +2453,28 @@ bool IRTranslator::translateInvoke(const User &U,
MCSymbol *EndSymbol = Context.createTempSymbol();
MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(EndSymbol);
- // FIXME: track probabilities.
+ SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests;
+ BranchProbabilityInfo *BPI = FuncInfo.BPI;
+ MachineBasicBlock *InvokeMBB = &MIRBuilder.getMBB();
+ BranchProbability EHPadBBProb =
+ BPI ? BPI->getEdgeProbability(InvokeMBB->getBasicBlock(), EHPadBB)
+ : BranchProbability::getZero();
+
+ if (!findUnwindDestinations(EHPadBB, EHPadBBProb, UnwindDests))
+ return false;
+
MachineBasicBlock &EHPadMBB = getMBB(*EHPadBB),
&ReturnMBB = getMBB(*ReturnBB);
+ // Update successor info.
+ addSuccessorWithProb(InvokeMBB, &ReturnMBB);
+ for (auto &UnwindDest : UnwindDests) {
+ UnwindDest.first->setIsEHPad();
+ addSuccessorWithProb(InvokeMBB, UnwindDest.first, UnwindDest.second);
+ }
+ InvokeMBB->normalizeSuccProbs();
+
MF->addInvoke(&EHPadMBB, BeginSymbol, EndSymbol);
- MIRBuilder.getMBB().addSuccessor(&ReturnMBB);
- MIRBuilder.getMBB().addSuccessor(&EHPadMBB);
MIRBuilder.buildBr(ReturnMBB);
-
return true;
}
@@ -1846,6 +2512,12 @@ bool IRTranslator::translateLandingPad(const User &U,
MIRBuilder.buildInstr(TargetOpcode::EH_LABEL)
.addSym(MF->addLandingPad(&MBB));
+ // If the unwinder does not preserve all registers, ensure that the
+ // function marks the clobbered registers as used.
+ const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
+ if (auto *RegMask = TRI.getCustomEHPadPreservedMask(*MF))
+ MF->getRegInfo().addPhysRegsUsedFromRegMask(RegMask);
+
LLT Ty = getLLTForType(*LP.getType(), *DL);
Register Undef = MRI->createGenericVirtualRegister(Ty);
MIRBuilder.buildUndef(Undef);
@@ -2184,8 +2856,8 @@ bool IRTranslator::translate(const Instruction &Inst) {
// We only emit constants into the entry block from here. To prevent jumpy
// debug behaviour set the line to 0.
if (const DebugLoc &DL = Inst.getDebugLoc())
- EntryBuilder->setDebugLoc(
- DebugLoc::get(0, 0, DL.getScope(), DL.getInlinedAt()));
+ EntryBuilder->setDebugLoc(DILocation::get(
+ Inst.getContext(), 0, 0, DL.getScope(), DL.getInlinedAt()));
else
EntryBuilder->setDebugLoc(DebugLoc());
@@ -2263,6 +2935,57 @@ bool IRTranslator::translate(const Constant &C, Register Reg) {
}
void IRTranslator::finalizeBasicBlock() {
+ for (auto &BTB : SL->BitTestCases) {
+ // Emit header first, if it wasn't already emitted.
+ if (!BTB.Emitted)
+ emitBitTestHeader(BTB, BTB.Parent);
+
+ BranchProbability UnhandledProb = BTB.Prob;
+ for (unsigned j = 0, ej = BTB.Cases.size(); j != ej; ++j) {
+ UnhandledProb -= BTB.Cases[j].ExtraProb;
+ // Set the current basic block to the mbb we wish to insert the code into
+ MachineBasicBlock *MBB = BTB.Cases[j].ThisBB;
+ // If all cases cover a contiguous range, it is not necessary to jump to
+ // the default block after the last bit test fails. This is because the
+ // range check during bit test header creation has guaranteed that every
+ // case here doesn't go outside the range. In this case, there is no need
+ // to perform the last bit test, as it will always be true. Instead, make
+ // the second-to-last bit-test fall through to the target of the last bit
+ // test, and delete the last bit test.
+
+ MachineBasicBlock *NextMBB;
+ if (BTB.ContiguousRange && j + 2 == ej) {
+ // Second-to-last bit-test with contiguous range: fall through to the
+ // target of the final bit test.
+ NextMBB = BTB.Cases[j + 1].TargetBB;
+ } else if (j + 1 == ej) {
+ // For the last bit test, fall through to Default.
+ NextMBB = BTB.Default;
+ } else {
+ // Otherwise, fall through to the next bit test.
+ NextMBB = BTB.Cases[j + 1].ThisBB;
+ }
+
+ emitBitTestCase(BTB, NextMBB, UnhandledProb, BTB.Reg, BTB.Cases[j], MBB);
+
+ // FIXME delete this block below?
+ if (BTB.ContiguousRange && j + 2 == ej) {
+ // Since we're not going to use the final bit test, remove it.
+ BTB.Cases.pop_back();
+ break;
+ }
+ }
+ // This is "default" BB. We have two jumps to it. From "header" BB and from
+ // last "case" BB, unless the latter was skipped.
+ CFGEdge HeaderToDefaultEdge = {BTB.Parent->getBasicBlock(),
+ BTB.Default->getBasicBlock()};
+ addMachineCFGPred(HeaderToDefaultEdge, BTB.Parent);
+ if (!BTB.ContiguousRange) {
+ addMachineCFGPred(HeaderToDefaultEdge, BTB.Cases.back().ThisBB);
+ }
+ }
+ SL->BitTestCases.clear();
+
for (auto &JTCase : SL->JTCases) {
// Emit header first, if it wasn't already emitted.
if (!JTCase.first.Emitted)
@@ -2271,6 +2994,10 @@ void IRTranslator::finalizeBasicBlock() {
emitJumpTable(JTCase.second, JTCase.second.MBB);
}
SL->JTCases.clear();
+
+ for (auto &SwCase : SL->SwitchCases)
+ emitSwitchCase(SwCase, &CurBuilder->getMBB(), *CurBuilder);
+ SL->SwitchCases.clear();
}
void IRTranslator::finalizeFunction() {
@@ -2332,14 +3059,23 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
MRI = &MF->getRegInfo();
DL = &F.getParent()->getDataLayout();
ORE = std::make_unique<OptimizationRemarkEmitter>(&F);
+ const TargetMachine &TM = MF->getTarget();
+ TM.resetTargetOptions(F);
+ EnableOpts = OptLevel != CodeGenOpt::None && !skipFunction(F);
FuncInfo.MF = MF;
- FuncInfo.BPI = nullptr;
+ if (EnableOpts)
+ FuncInfo.BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
+ else
+ FuncInfo.BPI = nullptr;
+
+ FuncInfo.CanLowerReturn = CLI->checkReturnTypeForCallConv(*MF);
+
const auto &TLI = *MF->getSubtarget().getTargetLowering();
- const TargetMachine &TM = MF->getTarget();
+
SL = std::make_unique<GISelSwitchLowering>(this, FuncInfo);
SL->init(TLI, TM, *DL);
- EnableOpts = TM.getOptLevel() != CodeGenOpt::None && !skipFunction(F);
+
assert(PendingPHIs.empty() && "stale PHIs");
@@ -2407,7 +3143,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
}
}
- if (!CLI->lowerFormalArguments(*EntryBuilder.get(), F, VRegArgs)) {
+ if (!CLI->lowerFormalArguments(*EntryBuilder.get(), F, VRegArgs, FuncInfo)) {
OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
F.getSubprogram(), &F.getEntryBlock());
R << "unable to lower arguments: " << ore::NV("Prototype", F.getType());
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
index 1e2a82615da8..bb4d41cfd69f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
@@ -562,6 +562,11 @@ bool InlineAsmLowering::lowerInlineAsm(
}
unsigned Flag = InlineAsm::getFlagWord(InlineAsm::Kind_RegUse, NumRegs);
+ if (OpInfo.Regs.front().isVirtual()) {
+ // Put the register class of the virtual registers in the flag word.
+ const TargetRegisterClass *RC = MRI->getRegClass(OpInfo.Regs.front());
+ Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID());
+ }
Inst.addImm(Flag);
if (!buildAnyextOrCopy(OpInfo.Regs[0], SourceRegs[0], MIRBuilder))
return false;
@@ -657,6 +662,7 @@ bool InlineAsmLowering::lowerAsmOperandForConstraint(
default:
return false;
case 'i': // Simple Integer or Relocatable Constant
+ case 'n': // immediate integer with a known value.
if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
assert(CI->getBitWidth() <= 64 &&
"expected immediate to fit into 64-bits");
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
index f32278d07052..25fae5487187 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
@@ -41,7 +41,7 @@ static cl::opt<std::string>
cl::desc("Record GlobalISel rule coverage files of this "
"prefix if instrumentation was generated"));
#else
-static const std::string CoveragePrefix = "";
+static const std::string CoveragePrefix;
#endif
char InstructionSelect::ID = 0;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
index 2fedc034d315..4fec9e628ddb 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
@@ -33,24 +33,12 @@ InstructionSelector::MatcherState::MatcherState(unsigned MaxRenderers)
InstructionSelector::InstructionSelector() = default;
-bool InstructionSelector::constrainOperandRegToRegClass(
- MachineInstr &I, unsigned OpIdx, const TargetRegisterClass &RC,
- const TargetInstrInfo &TII, const TargetRegisterInfo &TRI,
- const RegisterBankInfo &RBI) const {
- MachineBasicBlock &MBB = *I.getParent();
- MachineFunction &MF = *MBB.getParent();
- MachineRegisterInfo &MRI = MF.getRegInfo();
-
- return constrainOperandRegClass(MF, TRI, MRI, TII, RBI, I, RC,
- I.getOperand(OpIdx));
-}
-
bool InstructionSelector::isOperandImmEqual(
const MachineOperand &MO, int64_t Value,
const MachineRegisterInfo &MRI) const {
if (MO.isReg() && MO.getReg())
if (auto VRegVal = getConstantVRegValWithLookThrough(MO.getReg(), MRI))
- return VRegVal->Value == Value;
+ return VRegVal->Value.getSExtValue() == Value;
return false;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
index a83742f2138f..1993f6033291 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
@@ -10,6 +10,17 @@
//
//===----------------------------------------------------------------------===//
+// Enable optimizations to work around MSVC debug mode bug in 32-bit:
+// https://developercommunity.visualstudio.com/content/problem/1179643/msvc-copies-overaligned-non-trivially-copyable-par.html
+// FIXME: Remove this when the issue is closed.
+#if defined(_MSC_VER) && !defined(__clang__) && defined(_M_IX86)
+// We have to disable runtime checks in order to enable optimizations. This is
+// done for the entire file because the problem is actually observed in STL
+// template functions.
+#pragma runtime_checks("", off)
+#pragma optimize("gs", on)
+#endif
+
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
using namespace llvm;
@@ -24,7 +35,7 @@ LegalityPredicates::typeInSet(unsigned TypeIdx,
std::initializer_list<LLT> TypesInit) {
SmallVector<LLT, 4> Types = TypesInit;
return [=](const LegalityQuery &Query) {
- return std::find(Types.begin(), Types.end(), Query.Types[TypeIdx]) != Types.end();
+ return llvm::is_contained(Types, Query.Types[TypeIdx]);
};
}
@@ -34,7 +45,7 @@ LegalityPredicate LegalityPredicates::typePairInSet(
SmallVector<std::pair<LLT, LLT>, 4> Types = TypesInit;
return [=](const LegalityQuery &Query) {
std::pair<LLT, LLT> Match = {Query.Types[TypeIdx0], Query.Types[TypeIdx1]};
- return std::find(Types.begin(), Types.end(), Match) != Types.end();
+ return llvm::is_contained(Types, Match);
};
}
@@ -46,11 +57,10 @@ LegalityPredicate LegalityPredicates::typePairAndMemDescInSet(
TypePairAndMemDesc Match = {Query.Types[TypeIdx0], Query.Types[TypeIdx1],
Query.MMODescrs[MMOIdx].SizeInBits,
Query.MMODescrs[MMOIdx].AlignInBits};
- return std::find_if(
- TypesAndMemDesc.begin(), TypesAndMemDesc.end(),
- [=](const TypePairAndMemDesc &Entry) ->bool {
- return Match.isCompatible(Entry);
- }) != TypesAndMemDesc.end();
+ return llvm::any_of(TypesAndMemDesc,
+ [=](const TypePairAndMemDesc &Entry) -> bool {
+ return Match.isCompatible(Entry);
+ });
};
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp
index fcbecf90a845..f3ba3f080198 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp
@@ -43,6 +43,16 @@ LegalizeMutation LegalizeMutations::changeElementTo(unsigned TypeIdx,
};
}
+LegalizeMutation LegalizeMutations::changeElementSizeTo(unsigned TypeIdx,
+ unsigned FromTypeIdx) {
+ return [=](const LegalityQuery &Query) {
+ const LLT OldTy = Query.Types[TypeIdx];
+ const LLT NewTy = Query.Types[FromTypeIdx];
+ const LLT NewEltTy = LLT::scalar(NewTy.getScalarSizeInBits());
+ return std::make_pair(TypeIdx, OldTy.changeElementType(NewEltTy));
+ };
+}
+
LegalizeMutation LegalizeMutations::widenScalarOrEltToNextPow2(unsigned TypeIdx,
unsigned Min) {
return [=](const LegalityQuery &Query) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
index 1d7be54de3b0..5ba9367cac8a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
@@ -284,7 +284,7 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,
WrapperObserver)) {
WorkListObserver.printNewInstrs();
for (auto *DeadMI : DeadInstructions) {
- LLVM_DEBUG(dbgs() << *DeadMI << "Is dead\n");
+ LLVM_DEBUG(dbgs() << "Is dead: " << *DeadMI);
RemoveDeadInstFromLists(DeadMI);
DeadMI->eraseFromParentAndMarkDBGValuesForRemoval();
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 244e7a9583d6..e7f40523efaf 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -16,6 +16,7 @@
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
@@ -29,6 +30,7 @@
using namespace llvm;
using namespace LegalizeActions;
+using namespace MIPatternMatch;
/// Try to break down \p OrigTy into \p NarrowTy sized pieces.
///
@@ -75,6 +77,8 @@ static Type *getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty) {
return Type::getFloatTy(Ctx);
case 64:
return Type::getDoubleTy(Ctx);
+ case 80:
+ return Type::getX86_FP80Ty(Ctx);
case 128:
return Type::getFP128Ty(Ctx);
default:
@@ -86,16 +90,15 @@ LegalizerHelper::LegalizerHelper(MachineFunction &MF,
GISelChangeObserver &Observer,
MachineIRBuilder &Builder)
: MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
- LI(*MF.getSubtarget().getLegalizerInfo()) {
- MIRBuilder.setChangeObserver(Observer);
-}
+ LI(*MF.getSubtarget().getLegalizerInfo()),
+ TLI(*MF.getSubtarget().getTargetLowering()) { }
LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI,
GISelChangeObserver &Observer,
MachineIRBuilder &B)
- : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI) {
- MIRBuilder.setChangeObserver(Observer);
-}
+ : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
+ TLI(*MF.getSubtarget().getTargetLowering()) { }
+
LegalizerHelper::LegalizeResult
LegalizerHelper::legalizeInstrStep(MachineInstr &MI) {
LLVM_DEBUG(dbgs() << "Legalizing: " << MI);
@@ -237,22 +240,21 @@ void LegalizerHelper::insertParts(Register DstReg,
}
}
-/// Return the result registers of G_UNMERGE_VALUES \p MI in \p Regs
+/// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
static void getUnmergeResults(SmallVectorImpl<Register> &Regs,
const MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
+ const int StartIdx = Regs.size();
const int NumResults = MI.getNumOperands() - 1;
- Regs.resize(NumResults);
+ Regs.resize(Regs.size() + NumResults);
for (int I = 0; I != NumResults; ++I)
- Regs[I] = MI.getOperand(I).getReg();
+ Regs[StartIdx + I] = MI.getOperand(I).getReg();
}
-LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
- LLT NarrowTy, Register SrcReg) {
+void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
+ LLT GCDTy, Register SrcReg) {
LLT SrcTy = MRI.getType(SrcReg);
-
- LLT GCDTy = getGCDType(DstTy, getGCDType(SrcTy, NarrowTy));
if (SrcTy == GCDTy) {
// If the source already evenly divides the result type, we don't need to do
// anything.
@@ -262,7 +264,13 @@ LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
getUnmergeResults(Parts, *Unmerge);
}
+}
+LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
+ LLT NarrowTy, Register SrcReg) {
+ LLT SrcTy = MRI.getType(SrcReg);
+ LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
+ extractGCDType(Parts, GCDTy, SrcReg);
return GCDTy;
}
@@ -376,7 +384,14 @@ void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
}
if (LCMTy.isVector()) {
- MIRBuilder.buildExtract(DstReg, Remerge, 0);
+ unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
+ SmallVector<Register, 8> UnmergeDefs(NumDefs);
+ UnmergeDefs[0] = DstReg;
+ for (unsigned I = 1; I != NumDefs; ++I)
+ UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
+
+ MIRBuilder.buildUnmerge(UnmergeDefs,
+ MIRBuilder.buildMerge(LCMTy, RemergeRegs));
return;
}
@@ -384,7 +399,7 @@ void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
}
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
-#define RTLIBCASE(LibcallPrefix) \
+#define RTLIBCASE_INT(LibcallPrefix) \
do { \
switch (Size) { \
case 32: \
@@ -398,19 +413,33 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
} \
} while (0)
- assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
+#define RTLIBCASE(LibcallPrefix) \
+ do { \
+ switch (Size) { \
+ case 32: \
+ return RTLIB::LibcallPrefix##32; \
+ case 64: \
+ return RTLIB::LibcallPrefix##64; \
+ case 80: \
+ return RTLIB::LibcallPrefix##80; \
+ case 128: \
+ return RTLIB::LibcallPrefix##128; \
+ default: \
+ llvm_unreachable("unexpected size"); \
+ } \
+ } while (0)
switch (Opcode) {
case TargetOpcode::G_SDIV:
- RTLIBCASE(SDIV_I);
+ RTLIBCASE_INT(SDIV_I);
case TargetOpcode::G_UDIV:
- RTLIBCASE(UDIV_I);
+ RTLIBCASE_INT(UDIV_I);
case TargetOpcode::G_SREM:
- RTLIBCASE(SREM_I);
+ RTLIBCASE_INT(SREM_I);
case TargetOpcode::G_UREM:
- RTLIBCASE(UREM_I);
+ RTLIBCASE_INT(UREM_I);
case TargetOpcode::G_CTLZ_ZERO_UNDEF:
- RTLIBCASE(CTLZ_I);
+ RTLIBCASE_INT(CTLZ_I);
case TargetOpcode::G_FADD:
RTLIBCASE(ADD_F);
case TargetOpcode::G_FSUB:
@@ -453,13 +482,16 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
RTLIBCASE(RINT_F);
case TargetOpcode::G_FNEARBYINT:
RTLIBCASE(NEARBYINT_F);
+ case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
+ RTLIBCASE(ROUNDEVEN_F);
}
llvm_unreachable("Unknown libcall function");
}
/// True if an instruction is in tail position in its caller. Intended for
/// legalizing libcalls as tail calls when possible.
-static bool isLibCallInTailPosition(MachineInstr &MI) {
+static bool isLibCallInTailPosition(const TargetInstrInfo &TII,
+ MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent();
const Function &F = MBB.getParent()->getFunction();
@@ -479,7 +511,6 @@ static bool isLibCallInTailPosition(MachineInstr &MI) {
return false;
// Only tail call if the following instruction is a standard return.
- auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
return false;
@@ -531,12 +562,11 @@ simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
LegalizerHelper::LegalizeResult
llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
MachineInstr &MI) {
- assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
SmallVector<CallLowering::ArgInfo, 3> Args;
// Add all the args, except for the last which is an imm denoting 'tail'.
- for (unsigned i = 1; i < MI.getNumOperands() - 1; i++) {
+ for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
Register Reg = MI.getOperand(i).getReg();
// Need derive an IR type for call lowering.
@@ -551,31 +581,28 @@ llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
- Intrinsic::ID ID = MI.getOperand(0).getIntrinsicID();
RTLIB::Libcall RTLibcall;
- switch (ID) {
- case Intrinsic::memcpy:
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_MEMCPY:
RTLibcall = RTLIB::MEMCPY;
break;
- case Intrinsic::memset:
- RTLibcall = RTLIB::MEMSET;
- break;
- case Intrinsic::memmove:
+ case TargetOpcode::G_MEMMOVE:
RTLibcall = RTLIB::MEMMOVE;
break;
+ case TargetOpcode::G_MEMSET:
+ RTLibcall = RTLIB::MEMSET;
+ break;
default:
return LegalizerHelper::UnableToLegalize;
}
const char *Name = TLI.getLibcallName(RTLibcall);
- MIRBuilder.setInstrAndDebugLoc(MI);
-
CallLowering::CallLoweringInfo Info;
Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
Info.Callee = MachineOperand::CreateES(Name);
Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx));
- Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() == 1 &&
- isLibCallInTailPosition(MI);
+ Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() &&
+ isLibCallInTailPosition(MIRBuilder.getTII(), MI);
std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
if (!CLI.lowerCall(MIRBuilder, Info))
@@ -668,10 +695,11 @@ LegalizerHelper::libcall(MachineInstr &MI) {
case TargetOpcode::G_FMAXNUM:
case TargetOpcode::G_FSQRT:
case TargetOpcode::G_FRINT:
- case TargetOpcode::G_FNEARBYINT: {
+ case TargetOpcode::G_FNEARBYINT:
+ case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
- if (!HLTy || (Size != 32 && Size != 64 && Size != 128)) {
- LLVM_DEBUG(dbgs() << "No libcall available for size " << Size << ".\n");
+ if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
+ LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
return UnableToLegalize;
}
auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
@@ -720,6 +748,13 @@ LegalizerHelper::libcall(MachineInstr &MI) {
return Status;
break;
}
+ case TargetOpcode::G_MEMCPY:
+ case TargetOpcode::G_MEMMOVE:
+ case TargetOpcode::G_MEMSET: {
+ LegalizeResult Result = createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI);
+ MI.eraseFromParent();
+ return Result;
+ }
}
MI.eraseFromParent();
@@ -900,7 +935,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
case TargetOpcode::G_INSERT:
return narrowScalarInsert(MI, TypeIdx, NarrowTy);
case TargetOpcode::G_LOAD: {
- const auto &MMO = **MI.memoperands_begin();
+ auto &MMO = **MI.memoperands_begin();
Register DstReg = MI.getOperand(0).getReg();
LLT DstTy = MRI.getType(DstReg);
if (DstTy.isVector())
@@ -908,7 +943,6 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
if (8 * MMO.getSize() != DstTy.getSizeInBits()) {
Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
- auto &MMO = **MI.memoperands_begin();
MIRBuilder.buildLoad(TmpReg, MI.getOperand(1), MMO);
MIRBuilder.buildAnyExt(DstReg, TmpReg);
MI.eraseFromParent();
@@ -925,10 +959,15 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
auto &MMO = **MI.memoperands_begin();
- if (MMO.getSizeInBits() == NarrowSize) {
+ unsigned MemSize = MMO.getSizeInBits();
+
+ if (MemSize == NarrowSize) {
MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
- } else {
+ } else if (MemSize < NarrowSize) {
MIRBuilder.buildLoadInstr(MI.getOpcode(), TmpReg, PtrReg, MMO);
+ } else if (MemSize > NarrowSize) {
+ // FIXME: Need to split the load.
+ return UnableToLegalize;
}
if (ZExt)
@@ -1204,6 +1243,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
MI.eraseFromParent();
return Legalized;
}
+ case TargetOpcode::G_PTR_ADD:
case TargetOpcode::G_PTRMASK: {
if (TypeIdx != 1)
return UnableToLegalize;
@@ -1212,6 +1252,29 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
Observer.changedInstr(MI);
return Legalized;
}
+ case TargetOpcode::G_FPTOUI: {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+ Observer.changingInstr(MI);
+ narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+ case TargetOpcode::G_FPTOSI: {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+ Observer.changingInstr(MI);
+ narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_SEXT);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+ case TargetOpcode::G_FPEXT:
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+ Observer.changingInstr(MI);
+ narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
+ Observer.changedInstr(MI);
+ return Legalized;
}
}
@@ -1272,10 +1335,8 @@ void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy,
void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy,
unsigned OpIdx) {
MachineOperand &MO = MI.getOperand(OpIdx);
- Register DstExt = MRI.createGenericVirtualRegister(WideTy);
MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
- MIRBuilder.buildExtract(MO, DstExt, 0);
- MO.setReg(DstExt);
+ MO.setReg(widenWithUnmerge(WideTy, MO.getReg()));
}
void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy,
@@ -1443,6 +1504,40 @@ LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
return Legalized;
}
+Register LegalizerHelper::widenWithUnmerge(LLT WideTy, Register OrigReg) {
+ Register WideReg = MRI.createGenericVirtualRegister(WideTy);
+ LLT OrigTy = MRI.getType(OrigReg);
+ LLT LCMTy = getLCMType(WideTy, OrigTy);
+
+ const int NumMergeParts = LCMTy.getSizeInBits() / WideTy.getSizeInBits();
+ const int NumUnmergeParts = LCMTy.getSizeInBits() / OrigTy.getSizeInBits();
+
+ Register UnmergeSrc = WideReg;
+
+ // Create a merge to the LCM type, padding with undef
+ // %0:_(<3 x s32>) = G_FOO => <4 x s32>
+ // =>
+ // %1:_(<4 x s32>) = G_FOO
+ // %2:_(<4 x s32>) = G_IMPLICIT_DEF
+ // %3:_(<12 x s32>) = G_CONCAT_VECTORS %1, %2, %2
+ // %0:_(<3 x s32>), %4:_, %5:_, %6:_ = G_UNMERGE_VALUES %3
+ if (NumMergeParts > 1) {
+ Register Undef = MIRBuilder.buildUndef(WideTy).getReg(0);
+ SmallVector<Register, 8> MergeParts(NumMergeParts, Undef);
+ MergeParts[0] = WideReg;
+ UnmergeSrc = MIRBuilder.buildMerge(LCMTy, MergeParts).getReg(0);
+ }
+
+ // Unmerge to the original register and pad with dead defs.
+ SmallVector<Register, 8> UnmergeResults(NumUnmergeParts);
+ UnmergeResults[0] = OrigReg;
+ for (int I = 1; I != NumUnmergeParts; ++I)
+ UnmergeResults[I] = MRI.createGenericVirtualRegister(OrigTy);
+
+ MIRBuilder.buildUnmerge(UnmergeResults, UnmergeSrc);
+ return WideReg;
+}
+
LegalizerHelper::LegalizeResult
LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
LLT WideTy) {
@@ -1512,35 +1607,60 @@ LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
- // Create a sequence of unmerges to the original results. since we may have
- // widened the source, we will need to pad the results with dead defs to cover
- // the source register.
- // e.g. widen s16 to s32:
- // %1:_(s16), %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0:_(s48)
+ // Create a sequence of unmerges and merges to the original results. Since we
+ // may have widened the source, we will need to pad the results with dead defs
+ // to cover the source register.
+ // e.g. widen s48 to s64:
+ // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
//
// =>
- // %4:_(s64) = G_ANYEXT %0:_(s48)
- // %5:_(s32), %6:_(s32) = G_UNMERGE_VALUES %4 ; Requested unmerge
- // %1:_(s16), %2:_(s16) = G_UNMERGE_VALUES %5 ; unpack to original regs
- // %3:_(s16), dead %7 = G_UNMERGE_VALUES %6 ; original reg + extra dead def
-
+ // %4:_(s192) = G_ANYEXT %0:_(s96)
+ // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
+ // ; unpack to GCD type, with extra dead defs
+ // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
+ // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
+ // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
+ // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination
+ // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
+ const LLT GCDTy = getGCDType(WideTy, DstTy);
const int NumUnmerge = Unmerge->getNumOperands() - 1;
- const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
-
- for (int I = 0; I != NumUnmerge; ++I) {
- auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
-
- for (int J = 0; J != PartsPerUnmerge; ++J) {
- int Idx = I * PartsPerUnmerge + J;
- if (Idx < NumDst)
- MIB.addDef(MI.getOperand(Idx).getReg());
- else {
- // Create dead def for excess components.
- MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
+ const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
+
+ // Directly unmerge to the destination without going through a GCD type
+ // if possible
+ if (PartsPerRemerge == 1) {
+ const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
+
+ for (int I = 0; I != NumUnmerge; ++I) {
+ auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
+
+ for (int J = 0; J != PartsPerUnmerge; ++J) {
+ int Idx = I * PartsPerUnmerge + J;
+ if (Idx < NumDst)
+ MIB.addDef(MI.getOperand(Idx).getReg());
+ else {
+ // Create dead def for excess components.
+ MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
+ }
}
+
+ MIB.addUse(Unmerge.getReg(I));
}
+ } else {
+ SmallVector<Register, 16> Parts;
+ for (int J = 0; J != NumUnmerge; ++J)
+ extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
+
+ SmallVector<Register, 8> RemergeParts;
+ for (int I = 0; I != NumDst; ++I) {
+ for (int J = 0; J < PartsPerRemerge; ++J) {
+ const int Idx = I * PartsPerRemerge + J;
+ RemergeParts.emplace_back(Parts[Idx]);
+ }
- MIB.addUse(Unmerge.getReg(I));
+ MIRBuilder.buildMerge(MI.getOperand(I).getReg(), RemergeParts);
+ RemergeParts.clear();
+ }
}
MI.eraseFromParent();
@@ -1590,8 +1710,7 @@ LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
Src = MIRBuilder.buildAnyExt(WideTy, Src);
ShiftTy = WideTy;
- } else if (WideTy.getSizeInBits() > SrcTy.getSizeInBits())
- return UnableToLegalize;
+ }
auto LShr = MIRBuilder.buildLShr(
ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
@@ -1629,7 +1748,7 @@ LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
LegalizerHelper::LegalizeResult
LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
LLT WideTy) {
- if (TypeIdx != 0)
+ if (TypeIdx != 0 || WideTy.isVector())
return UnableToLegalize;
Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
@@ -1639,14 +1758,45 @@ LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
}
LegalizerHelper::LegalizeResult
-LegalizerHelper::widenScalarAddSubSat(MachineInstr &MI, unsigned TypeIdx,
- LLT WideTy) {
+LegalizerHelper::widenScalarAddoSubo(MachineInstr &MI, unsigned TypeIdx,
+ LLT WideTy) {
+ if (TypeIdx == 1)
+ return UnableToLegalize; // TODO
+ unsigned Op = MI.getOpcode();
+ unsigned Opcode = Op == TargetOpcode::G_UADDO || Op == TargetOpcode::G_SADDO
+ ? TargetOpcode::G_ADD
+ : TargetOpcode::G_SUB;
+ unsigned ExtOpcode =
+ Op == TargetOpcode::G_UADDO || Op == TargetOpcode::G_USUBO
+ ? TargetOpcode::G_ZEXT
+ : TargetOpcode::G_SEXT;
+ auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
+ auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
+ // Do the arithmetic in the larger type.
+ auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt});
+ LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
+ auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
+ auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
+ // There is no overflow if the ExtOp is the same as NewOp.
+ MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp);
+ // Now trunc the NewOp to the original result.
+ MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
+ LLT WideTy) {
bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
- MI.getOpcode() == TargetOpcode::G_SSUBSAT;
+ MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
+ MI.getOpcode() == TargetOpcode::G_SSHLSAT;
+ bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
+ MI.getOpcode() == TargetOpcode::G_USHLSAT;
// We can convert this to:
// 1. Any extend iN to iM
// 2. SHL by M-N
- // 3. [US][ADD|SUB]SAT
+ // 3. [US][ADD|SUB|SHL]SAT
// 4. L/ASHR by M-N
//
// It may be more efficient to lower this to a min and a max operation in
@@ -1657,11 +1807,14 @@ LegalizerHelper::widenScalarAddSubSat(MachineInstr &MI, unsigned TypeIdx,
unsigned NewBits = WideTy.getScalarSizeInBits();
unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
+ // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
+ // must not left shift the RHS to preserve the shift amount.
auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
- auto RHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
+ auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
+ : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
- auto ShiftR = MIRBuilder.buildShl(WideTy, RHS, ShiftK);
+ auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
{ShiftL, ShiftR}, MI.getFlags());
@@ -1689,34 +1842,18 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
return widenScalarMergeValues(MI, TypeIdx, WideTy);
case TargetOpcode::G_UNMERGE_VALUES:
return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
+ case TargetOpcode::G_SADDO:
+ case TargetOpcode::G_SSUBO:
case TargetOpcode::G_UADDO:
- case TargetOpcode::G_USUBO: {
- if (TypeIdx == 1)
- return UnableToLegalize; // TODO
- auto LHSZext = MIRBuilder.buildZExt(WideTy, MI.getOperand(2));
- auto RHSZext = MIRBuilder.buildZExt(WideTy, MI.getOperand(3));
- unsigned Opcode = MI.getOpcode() == TargetOpcode::G_UADDO
- ? TargetOpcode::G_ADD
- : TargetOpcode::G_SUB;
- // Do the arithmetic in the larger type.
- auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSZext, RHSZext});
- LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
- APInt Mask =
- APInt::getLowBitsSet(WideTy.getSizeInBits(), OrigTy.getSizeInBits());
- auto AndOp = MIRBuilder.buildAnd(
- WideTy, NewOp, MIRBuilder.buildConstant(WideTy, Mask));
- // There is no overflow if the AndOp is the same as NewOp.
- MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, AndOp);
- // Now trunc the NewOp to the original result.
- MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
- MI.eraseFromParent();
- return Legalized;
- }
+ case TargetOpcode::G_USUBO:
+ return widenScalarAddoSubo(MI, TypeIdx, WideTy);
case TargetOpcode::G_SADDSAT:
case TargetOpcode::G_SSUBSAT:
+ case TargetOpcode::G_SSHLSAT:
case TargetOpcode::G_UADDSAT:
case TargetOpcode::G_USUBSAT:
- return widenScalarAddSubSat(MI, TypeIdx, WideTy);
+ case TargetOpcode::G_USHLSAT:
+ return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
case TargetOpcode::G_CTTZ:
case TargetOpcode::G_CTTZ_ZERO_UNDEF:
case TargetOpcode::G_CTLZ:
@@ -1908,21 +2045,25 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_SITOFP:
- if (TypeIdx != 1)
- return UnableToLegalize;
Observer.changingInstr(MI);
- widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
+
+ if (TypeIdx == 0)
+ widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
+ else
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
+
Observer.changedInstr(MI);
return Legalized;
-
case TargetOpcode::G_UITOFP:
- if (TypeIdx != 1)
- return UnableToLegalize;
Observer.changingInstr(MI);
- widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
+
+ if (TypeIdx == 0)
+ widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
+ else
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
+
Observer.changedInstr(MI);
return Legalized;
-
case TargetOpcode::G_LOAD:
case TargetOpcode::G_SEXTLOAD:
case TargetOpcode::G_ZEXTLOAD:
@@ -1936,7 +2077,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
return UnableToLegalize;
LLT Ty = MRI.getType(MI.getOperand(0).getReg());
- if (!isPowerOf2_32(Ty.getSizeInBits()))
+ if (!Ty.isScalar())
return UnableToLegalize;
Observer.changingInstr(MI);
@@ -2134,6 +2275,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
case TargetOpcode::G_FPOW:
case TargetOpcode::G_INTRINSIC_TRUNC:
case TargetOpcode::G_INTRINSIC_ROUND:
+ case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
assert(TypeIdx == 0);
Observer.changingInstr(MI);
@@ -2143,6 +2285,15 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
Observer.changedInstr(MI);
return Legalized;
+ case TargetOpcode::G_FPOWI: {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+ Observer.changingInstr(MI);
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
+ widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
case TargetOpcode::G_INTTOPTR:
if (TypeIdx != 1)
return UnableToLegalize;
@@ -2169,8 +2320,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
// Avoid changing the result vector type if the source element type was
// requested.
if (TypeIdx == 1) {
- auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
- MI.setDesc(TII.get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
+ MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
} else {
widenScalarDst(MI, WideTy, 0);
}
@@ -2273,6 +2423,376 @@ LegalizerHelper::lowerBitcast(MachineInstr &MI) {
return UnableToLegalize;
}
+/// Figure out the bit offset into a register when coercing a vector index for
+/// the wide element type. This is only for the case when promoting vector to
+/// one with larger elements.
+//
+///
+/// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
+/// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
+static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B,
+ Register Idx,
+ unsigned NewEltSize,
+ unsigned OldEltSize) {
+ const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
+ LLT IdxTy = B.getMRI()->getType(Idx);
+
+ // Now figure out the amount we need to shift to get the target bits.
+ auto OffsetMask = B.buildConstant(
+ IdxTy, ~(APInt::getAllOnesValue(IdxTy.getSizeInBits()) << Log2EltRatio));
+ auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
+ return B.buildShl(IdxTy, OffsetIdx,
+ B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
+}
+
+/// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
+/// is casting to a vector with a smaller element size, perform multiple element
+/// extracts and merge the results. If this is coercing to a vector with larger
+/// elements, index the bitcasted vector and extract the target element with bit
+/// operations. This is intended to force the indexing in the native register
+/// size for architectures that can dynamically index the register file.
+LegalizerHelper::LegalizeResult
+LegalizerHelper::bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx,
+ LLT CastTy) {
+ if (TypeIdx != 1)
+ return UnableToLegalize;
+
+ Register Dst = MI.getOperand(0).getReg();
+ Register SrcVec = MI.getOperand(1).getReg();
+ Register Idx = MI.getOperand(2).getReg();
+ LLT SrcVecTy = MRI.getType(SrcVec);
+ LLT IdxTy = MRI.getType(Idx);
+
+ LLT SrcEltTy = SrcVecTy.getElementType();
+ unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
+ unsigned OldNumElts = SrcVecTy.getNumElements();
+
+ LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
+ Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
+
+ const unsigned NewEltSize = NewEltTy.getSizeInBits();
+ const unsigned OldEltSize = SrcEltTy.getSizeInBits();
+ if (NewNumElts > OldNumElts) {
+ // Decreasing the vector element size
+ //
+ // e.g. i64 = extract_vector_elt x:v2i64, y:i32
+ // =>
+ // v4i32:castx = bitcast x:v2i64
+ //
+ // i64 = bitcast
+ // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
+ // (i32 (extract_vector_elt castx, (2 * y + 1)))
+ //
+ if (NewNumElts % OldNumElts != 0)
+ return UnableToLegalize;
+
+ // Type of the intermediate result vector.
+ const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
+ LLT MidTy = LLT::scalarOrVector(NewEltsPerOldElt, NewEltTy);
+
+ auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
+
+ SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
+ auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
+
+ for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
+ auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
+ auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
+ auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
+ NewOps[I] = Elt.getReg(0);
+ }
+
+ auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps);
+ MIRBuilder.buildBitcast(Dst, NewVec);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ if (NewNumElts < OldNumElts) {
+ if (NewEltSize % OldEltSize != 0)
+ return UnableToLegalize;
+
+ // This only depends on powers of 2 because we use bit tricks to figure out
+ // the bit offset we need to shift to get the target element. A general
+ // expansion could emit division/multiply.
+ if (!isPowerOf2_32(NewEltSize / OldEltSize))
+ return UnableToLegalize;
+
+ // Increasing the vector element size.
+ // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
+ //
+ // =>
+ //
+ // %cast = G_BITCAST %vec
+ // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
+ // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
+ // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
+ // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
+ // %elt_bits = G_LSHR %wide_elt, %offset_bits
+ // %elt = G_TRUNC %elt_bits
+
+ const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
+ auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
+
+ // Divide to get the index in the wider element type.
+ auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
+
+ Register WideElt = CastVec;
+ if (CastTy.isVector()) {
+ WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
+ ScaledIdx).getReg(0);
+ }
+
+ // Compute the bit offset into the register of the target element.
+ Register OffsetBits = getBitcastWiderVectorElementOffset(
+ MIRBuilder, Idx, NewEltSize, OldEltSize);
+
+ // Shift the wide element to get the target element.
+ auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
+ MIRBuilder.buildTrunc(Dst, ExtractedBits);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ return UnableToLegalize;
+}
+
+/// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
+/// TargetReg, while preserving other bits in \p TargetReg.
+///
+/// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
+static Register buildBitFieldInsert(MachineIRBuilder &B,
+ Register TargetReg, Register InsertReg,
+ Register OffsetBits) {
+ LLT TargetTy = B.getMRI()->getType(TargetReg);
+ LLT InsertTy = B.getMRI()->getType(InsertReg);
+ auto ZextVal = B.buildZExt(TargetTy, InsertReg);
+ auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
+
+ // Produce a bitmask of the value to insert
+ auto EltMask = B.buildConstant(
+ TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
+ InsertTy.getSizeInBits()));
+ // Shift it into position
+ auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
+ auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
+
+ // Clear out the bits in the wide element
+ auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
+
+ // The value to insert has all zeros already, so stick it into the masked
+ // wide element.
+ return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
+}
+
+/// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
+/// is increasing the element size, perform the indexing in the target element
+/// type, and use bit operations to insert at the element position. This is
+/// intended for architectures that can dynamically index the register file and
+/// want to force indexing in the native register size.
+LegalizerHelper::LegalizeResult
+LegalizerHelper::bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx,
+ LLT CastTy) {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ Register Dst = MI.getOperand(0).getReg();
+ Register SrcVec = MI.getOperand(1).getReg();
+ Register Val = MI.getOperand(2).getReg();
+ Register Idx = MI.getOperand(3).getReg();
+
+ LLT VecTy = MRI.getType(Dst);
+ LLT IdxTy = MRI.getType(Idx);
+
+ LLT VecEltTy = VecTy.getElementType();
+ LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
+ const unsigned NewEltSize = NewEltTy.getSizeInBits();
+ const unsigned OldEltSize = VecEltTy.getSizeInBits();
+
+ unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
+ unsigned OldNumElts = VecTy.getNumElements();
+
+ Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
+ if (NewNumElts < OldNumElts) {
+ if (NewEltSize % OldEltSize != 0)
+ return UnableToLegalize;
+
+ // This only depends on powers of 2 because we use bit tricks to figure out
+ // the bit offset we need to shift to get the target element. A general
+ // expansion could emit division/multiply.
+ if (!isPowerOf2_32(NewEltSize / OldEltSize))
+ return UnableToLegalize;
+
+ const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
+ auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
+
+ // Divide to get the index in the wider element type.
+ auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
+
+ Register ExtractedElt = CastVec;
+ if (CastTy.isVector()) {
+ ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
+ ScaledIdx).getReg(0);
+ }
+
+ // Compute the bit offset into the register of the target element.
+ Register OffsetBits = getBitcastWiderVectorElementOffset(
+ MIRBuilder, Idx, NewEltSize, OldEltSize);
+
+ Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
+ Val, OffsetBits);
+ if (CastTy.isVector()) {
+ InsertedElt = MIRBuilder.buildInsertVectorElement(
+ CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
+ }
+
+ MIRBuilder.buildBitcast(Dst, InsertedElt);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ return UnableToLegalize;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerLoad(MachineInstr &MI) {
+ // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
+ Register DstReg = MI.getOperand(0).getReg();
+ Register PtrReg = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ auto &MMO = **MI.memoperands_begin();
+
+ if (DstTy.getSizeInBits() == MMO.getSizeInBits()) {
+ if (MI.getOpcode() == TargetOpcode::G_LOAD) {
+ // This load needs splitting into power of 2 sized loads.
+ if (DstTy.isVector())
+ return UnableToLegalize;
+ if (isPowerOf2_32(DstTy.getSizeInBits()))
+ return UnableToLegalize; // Don't know what we're being asked to do.
+
+ // Our strategy here is to generate anyextending loads for the smaller
+ // types up to next power-2 result type, and then combine the two larger
+ // result values together, before truncating back down to the non-pow-2
+ // type.
+ // E.g. v1 = i24 load =>
+ // v2 = i32 zextload (2 byte)
+ // v3 = i32 load (1 byte)
+ // v4 = i32 shl v3, 16
+ // v5 = i32 or v4, v2
+ // v1 = i24 trunc v5
+ // By doing this we generate the correct truncate which should get
+ // combined away as an artifact with a matching extend.
+ uint64_t LargeSplitSize = PowerOf2Floor(DstTy.getSizeInBits());
+ uint64_t SmallSplitSize = DstTy.getSizeInBits() - LargeSplitSize;
+
+ MachineFunction &MF = MIRBuilder.getMF();
+ MachineMemOperand *LargeMMO =
+ MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
+ MachineMemOperand *SmallMMO = MF.getMachineMemOperand(
+ &MMO, LargeSplitSize / 8, SmallSplitSize / 8);
+
+ LLT PtrTy = MRI.getType(PtrReg);
+ unsigned AnyExtSize = NextPowerOf2(DstTy.getSizeInBits());
+ LLT AnyExtTy = LLT::scalar(AnyExtSize);
+ Register LargeLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
+ Register SmallLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
+ auto LargeLoad = MIRBuilder.buildLoadInstr(
+ TargetOpcode::G_ZEXTLOAD, LargeLdReg, PtrReg, *LargeMMO);
+
+ auto OffsetCst = MIRBuilder.buildConstant(
+ LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
+ Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
+ auto SmallPtr =
+ MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0));
+ auto SmallLoad = MIRBuilder.buildLoad(SmallLdReg, SmallPtr.getReg(0),
+ *SmallMMO);
+
+ auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
+ auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
+ auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
+ MIRBuilder.buildTrunc(DstReg, {Or.getReg(0)});
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ MIRBuilder.buildLoad(DstReg, PtrReg, MMO);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ if (DstTy.isScalar()) {
+ Register TmpReg =
+ MRI.createGenericVirtualRegister(LLT::scalar(MMO.getSizeInBits()));
+ MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
+ switch (MI.getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected opcode");
+ case TargetOpcode::G_LOAD:
+ MIRBuilder.buildAnyExtOrTrunc(DstReg, TmpReg);
+ break;
+ case TargetOpcode::G_SEXTLOAD:
+ MIRBuilder.buildSExt(DstReg, TmpReg);
+ break;
+ case TargetOpcode::G_ZEXTLOAD:
+ MIRBuilder.buildZExt(DstReg, TmpReg);
+ break;
+ }
+
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ return UnableToLegalize;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerStore(MachineInstr &MI) {
+ // Lower a non-power of 2 store into multiple pow-2 stores.
+ // E.g. split an i24 store into an i16 store + i8 store.
+ // We do this by first extending the stored value to the next largest power
+ // of 2 type, and then using truncating stores to store the components.
+ // By doing this, likewise with G_LOAD, generate an extend that can be
+ // artifact-combined away instead of leaving behind extracts.
+ Register SrcReg = MI.getOperand(0).getReg();
+ Register PtrReg = MI.getOperand(1).getReg();
+ LLT SrcTy = MRI.getType(SrcReg);
+ MachineMemOperand &MMO = **MI.memoperands_begin();
+ if (SrcTy.getSizeInBits() != MMO.getSizeInBits())
+ return UnableToLegalize;
+ if (SrcTy.isVector())
+ return UnableToLegalize;
+ if (isPowerOf2_32(SrcTy.getSizeInBits()))
+ return UnableToLegalize; // Don't know what we're being asked to do.
+
+ // Extend to the next pow-2.
+ const LLT ExtendTy = LLT::scalar(NextPowerOf2(SrcTy.getSizeInBits()));
+ auto ExtVal = MIRBuilder.buildAnyExt(ExtendTy, SrcReg);
+
+ // Obtain the smaller value by shifting away the larger value.
+ uint64_t LargeSplitSize = PowerOf2Floor(SrcTy.getSizeInBits());
+ uint64_t SmallSplitSize = SrcTy.getSizeInBits() - LargeSplitSize;
+ auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize);
+ auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt);
+
+ // Generate the PtrAdd and truncating stores.
+ LLT PtrTy = MRI.getType(PtrReg);
+ auto OffsetCst = MIRBuilder.buildConstant(
+ LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
+ Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
+ auto SmallPtr =
+ MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0));
+
+ MachineFunction &MF = MIRBuilder.getMF();
+ MachineMemOperand *LargeMMO =
+ MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
+ MachineMemOperand *SmallMMO =
+ MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
+ MIRBuilder.buildStore(ExtVal.getReg(0), PtrReg, *LargeMMO);
+ MIRBuilder.buildStore(SmallVal.getReg(0), SmallPtr.getReg(0), *SmallMMO);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
LegalizerHelper::LegalizeResult
LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
switch (MI.getOpcode()) {
@@ -2321,13 +2841,24 @@ LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
Observer.changedInstr(MI);
return Legalized;
}
+ case TargetOpcode::G_EXTRACT_VECTOR_ELT:
+ return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
+ case TargetOpcode::G_INSERT_VECTOR_ELT:
+ return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
default:
return UnableToLegalize;
}
}
+// Legalize an instruction by changing the opcode in place.
+void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
+ Observer.changingInstr(MI);
+ MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
+ Observer.changedInstr(MI);
+}
+
LegalizerHelper::LegalizeResult
-LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
+LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
using namespace TargetOpcode;
switch(MI.getOpcode()) {
@@ -2337,6 +2868,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
return lowerBitcast(MI);
case TargetOpcode::G_SREM:
case TargetOpcode::G_UREM: {
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
auto Quot =
MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
{MI.getOperand(1), MI.getOperand(2)});
@@ -2349,6 +2881,9 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
case TargetOpcode::G_SADDO:
case TargetOpcode::G_SSUBO:
return lowerSADDO_SSUBO(MI);
+ case TargetOpcode::G_UMULH:
+ case TargetOpcode::G_SMULH:
+ return lowerSMULH_UMULH(MI);
case TargetOpcode::G_SMULO:
case TargetOpcode::G_UMULO: {
// Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
@@ -2357,6 +2892,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
Register Overflow = MI.getOperand(1).getReg();
Register LHS = MI.getOperand(2).getReg();
Register RHS = MI.getOperand(3).getReg();
+ LLT Ty = MRI.getType(Res);
unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
? TargetOpcode::G_SMULH
@@ -2386,31 +2922,29 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
return Legalized;
}
case TargetOpcode::G_FNEG: {
+ Register Res = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(Res);
+
// TODO: Handle vector types once we are able to
// represent them.
if (Ty.isVector())
return UnableToLegalize;
- Register Res = MI.getOperand(0).getReg();
- LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
- Type *ZeroTy = getFloatTypeForLLT(Ctx, Ty);
- if (!ZeroTy)
- return UnableToLegalize;
- ConstantFP &ZeroForNegation =
- *cast<ConstantFP>(ConstantFP::getZeroValueForNegation(ZeroTy));
- auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation);
+ auto SignMask =
+ MIRBuilder.buildConstant(Ty, APInt::getSignMask(Ty.getSizeInBits()));
Register SubByReg = MI.getOperand(1).getReg();
- Register ZeroReg = Zero.getReg(0);
- MIRBuilder.buildFSub(Res, ZeroReg, SubByReg, MI.getFlags());
+ MIRBuilder.buildXor(Res, SubByReg, SignMask);
MI.eraseFromParent();
return Legalized;
}
case TargetOpcode::G_FSUB: {
+ Register Res = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(Res);
+
// Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
// First, check if G_FNEG is marked as Lower. If so, we may
// end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
if (LI.getAction({G_FNEG, {Ty}}).Action == Lower)
return UnableToLegalize;
- Register Res = MI.getOperand(0).getReg();
Register LHS = MI.getOperand(1).getReg();
Register RHS = MI.getOperand(2).getReg();
Register Neg = MRI.createGenericVirtualRegister(Ty);
@@ -2425,6 +2959,12 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
return lowerFFloor(MI);
case TargetOpcode::G_INTRINSIC_ROUND:
return lowerIntrinsicRound(MI);
+ case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
+ // Since round even is the assumed rounding mode for unconstrained FP
+ // operations, rint and roundeven are the same operation.
+ changeOpcode(MI, TargetOpcode::G_FRINT);
+ return Legalized;
+ }
case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
Register OldValRes = MI.getOperand(0).getReg();
Register SuccessRes = MI.getOperand(1).getReg();
@@ -2439,145 +2979,16 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
}
case TargetOpcode::G_LOAD:
case TargetOpcode::G_SEXTLOAD:
- case TargetOpcode::G_ZEXTLOAD: {
- // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
- Register DstReg = MI.getOperand(0).getReg();
- Register PtrReg = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(DstReg);
- auto &MMO = **MI.memoperands_begin();
-
- if (DstTy.getSizeInBits() == MMO.getSizeInBits()) {
- if (MI.getOpcode() == TargetOpcode::G_LOAD) {
- // This load needs splitting into power of 2 sized loads.
- if (DstTy.isVector())
- return UnableToLegalize;
- if (isPowerOf2_32(DstTy.getSizeInBits()))
- return UnableToLegalize; // Don't know what we're being asked to do.
-
- // Our strategy here is to generate anyextending loads for the smaller
- // types up to next power-2 result type, and then combine the two larger
- // result values together, before truncating back down to the non-pow-2
- // type.
- // E.g. v1 = i24 load =>
- // v2 = i32 zextload (2 byte)
- // v3 = i32 load (1 byte)
- // v4 = i32 shl v3, 16
- // v5 = i32 or v4, v2
- // v1 = i24 trunc v5
- // By doing this we generate the correct truncate which should get
- // combined away as an artifact with a matching extend.
- uint64_t LargeSplitSize = PowerOf2Floor(DstTy.getSizeInBits());
- uint64_t SmallSplitSize = DstTy.getSizeInBits() - LargeSplitSize;
-
- MachineFunction &MF = MIRBuilder.getMF();
- MachineMemOperand *LargeMMO =
- MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
- MachineMemOperand *SmallMMO = MF.getMachineMemOperand(
- &MMO, LargeSplitSize / 8, SmallSplitSize / 8);
-
- LLT PtrTy = MRI.getType(PtrReg);
- unsigned AnyExtSize = NextPowerOf2(DstTy.getSizeInBits());
- LLT AnyExtTy = LLT::scalar(AnyExtSize);
- Register LargeLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
- Register SmallLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
- auto LargeLoad = MIRBuilder.buildLoadInstr(
- TargetOpcode::G_ZEXTLOAD, LargeLdReg, PtrReg, *LargeMMO);
-
- auto OffsetCst = MIRBuilder.buildConstant(
- LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
- Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
- auto SmallPtr =
- MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0));
- auto SmallLoad = MIRBuilder.buildLoad(SmallLdReg, SmallPtr.getReg(0),
- *SmallMMO);
-
- auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
- auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
- auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
- MIRBuilder.buildTrunc(DstReg, {Or.getReg(0)});
- MI.eraseFromParent();
- return Legalized;
- }
- MIRBuilder.buildLoad(DstReg, PtrReg, MMO);
- MI.eraseFromParent();
- return Legalized;
- }
-
- if (DstTy.isScalar()) {
- Register TmpReg =
- MRI.createGenericVirtualRegister(LLT::scalar(MMO.getSizeInBits()));
- MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
- switch (MI.getOpcode()) {
- default:
- llvm_unreachable("Unexpected opcode");
- case TargetOpcode::G_LOAD:
- MIRBuilder.buildExtOrTrunc(TargetOpcode::G_ANYEXT, DstReg, TmpReg);
- break;
- case TargetOpcode::G_SEXTLOAD:
- MIRBuilder.buildSExt(DstReg, TmpReg);
- break;
- case TargetOpcode::G_ZEXTLOAD:
- MIRBuilder.buildZExt(DstReg, TmpReg);
- break;
- }
- MI.eraseFromParent();
- return Legalized;
- }
-
- return UnableToLegalize;
- }
- case TargetOpcode::G_STORE: {
- // Lower a non-power of 2 store into multiple pow-2 stores.
- // E.g. split an i24 store into an i16 store + i8 store.
- // We do this by first extending the stored value to the next largest power
- // of 2 type, and then using truncating stores to store the components.
- // By doing this, likewise with G_LOAD, generate an extend that can be
- // artifact-combined away instead of leaving behind extracts.
- Register SrcReg = MI.getOperand(0).getReg();
- Register PtrReg = MI.getOperand(1).getReg();
- LLT SrcTy = MRI.getType(SrcReg);
- MachineMemOperand &MMO = **MI.memoperands_begin();
- if (SrcTy.getSizeInBits() != MMO.getSizeInBits())
- return UnableToLegalize;
- if (SrcTy.isVector())
- return UnableToLegalize;
- if (isPowerOf2_32(SrcTy.getSizeInBits()))
- return UnableToLegalize; // Don't know what we're being asked to do.
-
- // Extend to the next pow-2.
- const LLT ExtendTy = LLT::scalar(NextPowerOf2(SrcTy.getSizeInBits()));
- auto ExtVal = MIRBuilder.buildAnyExt(ExtendTy, SrcReg);
-
- // Obtain the smaller value by shifting away the larger value.
- uint64_t LargeSplitSize = PowerOf2Floor(SrcTy.getSizeInBits());
- uint64_t SmallSplitSize = SrcTy.getSizeInBits() - LargeSplitSize;
- auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize);
- auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt);
-
- // Generate the PtrAdd and truncating stores.
- LLT PtrTy = MRI.getType(PtrReg);
- auto OffsetCst = MIRBuilder.buildConstant(
- LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
- Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
- auto SmallPtr =
- MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0));
-
- MachineFunction &MF = MIRBuilder.getMF();
- MachineMemOperand *LargeMMO =
- MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
- MachineMemOperand *SmallMMO =
- MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
- MIRBuilder.buildStore(ExtVal.getReg(0), PtrReg, *LargeMMO);
- MIRBuilder.buildStore(SmallVal.getReg(0), SmallPtr.getReg(0), *SmallMMO);
- MI.eraseFromParent();
- return Legalized;
- }
+ case TargetOpcode::G_ZEXTLOAD:
+ return lowerLoad(MI);
+ case TargetOpcode::G_STORE:
+ return lowerStore(MI);
case TargetOpcode::G_CTLZ_ZERO_UNDEF:
case TargetOpcode::G_CTTZ_ZERO_UNDEF:
case TargetOpcode::G_CTLZ:
case TargetOpcode::G_CTTZ:
case TargetOpcode::G_CTPOP:
- return lowerBitCount(MI, TypeIdx, Ty);
+ return lowerBitCount(MI);
case G_UADDO: {
Register Res = MI.getOperand(0).getReg();
Register CarryOut = MI.getOperand(1).getReg();
@@ -2639,22 +3050,24 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
return Legalized;
}
case G_UITOFP:
- return lowerUITOFP(MI, TypeIdx, Ty);
+ return lowerUITOFP(MI);
case G_SITOFP:
- return lowerSITOFP(MI, TypeIdx, Ty);
+ return lowerSITOFP(MI);
case G_FPTOUI:
- return lowerFPTOUI(MI, TypeIdx, Ty);
+ return lowerFPTOUI(MI);
case G_FPTOSI:
return lowerFPTOSI(MI);
case G_FPTRUNC:
- return lowerFPTRUNC(MI, TypeIdx, Ty);
+ return lowerFPTRUNC(MI);
+ case G_FPOWI:
+ return lowerFPOWI(MI);
case G_SMIN:
case G_SMAX:
case G_UMIN:
case G_UMAX:
- return lowerMinMax(MI, TypeIdx, Ty);
+ return lowerMinMax(MI);
case G_FCOPYSIGN:
- return lowerFCopySign(MI, TypeIdx, Ty);
+ return lowerFCopySign(MI);
case G_FMINNUM:
case G_FMAXNUM:
return lowerFMinNumMaxNum(MI);
@@ -2677,6 +3090,9 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
MI.eraseFromParent();
return Legalized;
}
+ case G_EXTRACT_VECTOR_ELT:
+ case G_INSERT_VECTOR_ELT:
+ return lowerExtractInsertVectorElt(MI);
case G_SHUFFLE_VECTOR:
return lowerShuffleVector(MI);
case G_DYN_STACKALLOC:
@@ -2692,33 +3108,123 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
case G_READ_REGISTER:
case G_WRITE_REGISTER:
return lowerReadWriteRegister(MI);
+ case G_UADDSAT:
+ case G_USUBSAT: {
+ // Try to make a reasonable guess about which lowering strategy to use. The
+ // target can override this with custom lowering and calling the
+ // implementation functions.
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ if (LI.isLegalOrCustom({G_UMIN, Ty}))
+ return lowerAddSubSatToMinMax(MI);
+ return lowerAddSubSatToAddoSubo(MI);
+ }
+ case G_SADDSAT:
+ case G_SSUBSAT: {
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+
+ // FIXME: It would probably make more sense to see if G_SADDO is preferred,
+ // since it's a shorter expansion. However, we would need to figure out the
+ // preferred boolean type for the carry out for the query.
+ if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
+ return lowerAddSubSatToMinMax(MI);
+ return lowerAddSubSatToAddoSubo(MI);
+ }
+ case G_SSHLSAT:
+ case G_USHLSAT:
+ return lowerShlSat(MI);
+ case G_ABS: {
+ // Expand %res = G_ABS %a into:
+ // %v1 = G_ASHR %a, scalar_size-1
+ // %v2 = G_ADD %a, %v1
+ // %res = G_XOR %v2, %v1
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ Register OpReg = MI.getOperand(1).getReg();
+ auto ShiftAmt =
+ MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1);
+ auto Shift =
+ MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
+ auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift);
+ MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case G_SELECT:
+ return lowerSelect(MI);
}
}
+Align LegalizerHelper::getStackTemporaryAlignment(LLT Ty,
+ Align MinAlign) const {
+ // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
+ // datalayout for the preferred alignment. Also there should be a target hook
+ // for this to allow targets to reduce the alignment and ignore the
+ // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
+ // the type.
+ return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign);
+}
+
+MachineInstrBuilder
+LegalizerHelper::createStackTemporary(TypeSize Bytes, Align Alignment,
+ MachinePointerInfo &PtrInfo) {
+ MachineFunction &MF = MIRBuilder.getMF();
+ const DataLayout &DL = MIRBuilder.getDataLayout();
+ int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false);
+
+ unsigned AddrSpace = DL.getAllocaAddrSpace();
+ LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
+
+ PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
+ return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
+}
+
+static Register clampDynamicVectorIndex(MachineIRBuilder &B, Register IdxReg,
+ LLT VecTy) {
+ int64_t IdxVal;
+ if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal)))
+ return IdxReg;
+
+ LLT IdxTy = B.getMRI()->getType(IdxReg);
+ unsigned NElts = VecTy.getNumElements();
+ if (isPowerOf2_32(NElts)) {
+ APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
+ return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
+ }
+
+ return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1))
+ .getReg(0);
+}
+
+Register LegalizerHelper::getVectorElementPointer(Register VecPtr, LLT VecTy,
+ Register Index) {
+ LLT EltTy = VecTy.getElementType();
+
+ // Calculate the element offset and add it to the pointer.
+ unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
+ assert(EltSize * 8 == EltTy.getSizeInBits() &&
+ "Converting bits to bytes lost precision");
+
+ Index = clampDynamicVectorIndex(MIRBuilder, Index, VecTy);
+
+ LLT IdxTy = MRI.getType(Index);
+ auto Mul = MIRBuilder.buildMul(IdxTy, Index,
+ MIRBuilder.buildConstant(IdxTy, EltSize));
+
+ LLT PtrTy = MRI.getType(VecPtr);
+ return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
+}
+
LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef(
MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) {
- SmallVector<Register, 2> DstRegs;
-
- unsigned NarrowSize = NarrowTy.getSizeInBits();
Register DstReg = MI.getOperand(0).getReg();
- unsigned Size = MRI.getType(DstReg).getSizeInBits();
- int NumParts = Size / NarrowSize;
- // FIXME: Don't know how to handle the situation where the small vectors
- // aren't all the same size yet.
- if (Size % NarrowSize != 0)
- return UnableToLegalize;
+ LLT DstTy = MRI.getType(DstReg);
+ LLT LCMTy = getLCMType(DstTy, NarrowTy);
- for (int i = 0; i < NumParts; ++i) {
- Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
- MIRBuilder.buildUndef(TmpReg);
- DstRegs.push_back(TmpReg);
- }
+ unsigned NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
- if (NarrowTy.isVector())
- MIRBuilder.buildConcatVectors(DstReg, DstRegs);
- else
- MIRBuilder.buildBuildVector(DstReg, DstRegs);
+ auto NewUndef = MIRBuilder.buildUndef(NarrowTy);
+ SmallVector<Register, 8> Parts(NumParts, NewUndef.getReg(0));
+ buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
MI.eraseFromParent();
return Legalized;
}
@@ -2839,7 +3345,7 @@ LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx,
if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements())
return UnableToLegalize;
- NarrowTy1 = LLT::vector(NumParts, SrcTy.getElementType().getSizeInBits());
+ NarrowTy1 = LLT::vector(NarrowTy.getNumElements(), SrcTy.getElementType());
} else {
NumParts = DstTy.getNumElements();
NarrowTy1 = SrcTy.getElementType();
@@ -3112,63 +3618,116 @@ LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI,
return Legalized;
}
+// Handle FewerElementsVector a G_BUILD_VECTOR or G_CONCAT_VECTORS that produces
+// a vector
+//
+// Create a G_BUILD_VECTOR or G_CONCAT_VECTORS of NarrowTy pieces, padding with
+// undef as necessary.
+//
+// %3:_(<3 x s16>) = G_BUILD_VECTOR %0, %1, %2
+// -> <2 x s16>
+//
+// %4:_(s16) = G_IMPLICIT_DEF
+// %5:_(<2 x s16>) = G_BUILD_VECTOR %0, %1
+// %6:_(<2 x s16>) = G_BUILD_VECTOR %2, %4
+// %7:_(<2 x s16>) = G_IMPLICIT_DEF
+// %8:_(<6 x s16>) = G_CONCAT_VECTORS %5, %6, %7
+// %3:_(<3 x s16>), %8:_(<3 x s16>) = G_UNMERGE_VALUES %8
LegalizerHelper::LegalizeResult
-LegalizerHelper::fewerElementsVectorBuildVector(MachineInstr &MI,
- unsigned TypeIdx,
- LLT NarrowTy) {
- assert(TypeIdx == 0 && "not a vector type index");
+LegalizerHelper::fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy) {
Register DstReg = MI.getOperand(0).getReg();
LLT DstTy = MRI.getType(DstReg);
- LLT SrcTy = DstTy.getElementType();
+ LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
+ LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
- int DstNumElts = DstTy.getNumElements();
- int NarrowNumElts = NarrowTy.getNumElements();
- int NumConcat = (DstNumElts + NarrowNumElts - 1) / NarrowNumElts;
- LLT WidenedDstTy = LLT::vector(NarrowNumElts * NumConcat, SrcTy);
+ // Break into a common type
+ SmallVector<Register, 16> Parts;
+ for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
+ extractGCDType(Parts, GCDTy, MI.getOperand(I).getReg());
- SmallVector<Register, 8> ConcatOps;
- SmallVector<Register, 8> SubBuildVector;
+ // Build the requested new merge, padding with undef.
+ LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts,
+ TargetOpcode::G_ANYEXT);
- Register UndefReg;
- if (WidenedDstTy != DstTy)
- UndefReg = MIRBuilder.buildUndef(SrcTy).getReg(0);
+ // Pack into the original result register.
+ buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
- // Create a G_CONCAT_VECTORS of NarrowTy pieces, padding with undef as
- // necessary.
- //
- // %3:_(<3 x s16>) = G_BUILD_VECTOR %0, %1, %2
- // -> <2 x s16>
- //
- // %4:_(s16) = G_IMPLICIT_DEF
- // %5:_(<2 x s16>) = G_BUILD_VECTOR %0, %1
- // %6:_(<2 x s16>) = G_BUILD_VECTOR %2, %4
- // %7:_(<4 x s16>) = G_CONCAT_VECTORS %5, %6
- // %3:_(<3 x s16>) = G_EXTRACT %7, 0
- for (int I = 0; I != NumConcat; ++I) {
- for (int J = 0; J != NarrowNumElts; ++J) {
- int SrcIdx = NarrowNumElts * I + J;
-
- if (SrcIdx < DstNumElts) {
- Register SrcReg = MI.getOperand(SrcIdx + 1).getReg();
- SubBuildVector.push_back(SrcReg);
- } else
- SubBuildVector.push_back(UndefReg);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI,
+ unsigned TypeIdx,
+ LLT NarrowVecTy) {
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcVec = MI.getOperand(1).getReg();
+ Register InsertVal;
+ bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
+
+ assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index");
+ if (IsInsert)
+ InsertVal = MI.getOperand(2).getReg();
+
+ Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
+
+ // TODO: Handle total scalarization case.
+ if (!NarrowVecTy.isVector())
+ return UnableToLegalize;
+
+ LLT VecTy = MRI.getType(SrcVec);
+
+ // If the index is a constant, we can really break this down as you would
+ // expect, and index into the target size pieces.
+ int64_t IdxVal;
+ if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
+ // Avoid out of bounds indexing the pieces.
+ if (IdxVal >= VecTy.getNumElements()) {
+ MIRBuilder.buildUndef(DstReg);
+ MI.eraseFromParent();
+ return Legalized;
}
- auto BuildVec = MIRBuilder.buildBuildVector(NarrowTy, SubBuildVector);
- ConcatOps.push_back(BuildVec.getReg(0));
- SubBuildVector.clear();
- }
+ SmallVector<Register, 8> VecParts;
+ LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
+
+ // Build a sequence of NarrowTy pieces in VecParts for this operand.
+ LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
+ TargetOpcode::G_ANYEXT);
+
+ unsigned NewNumElts = NarrowVecTy.getNumElements();
+
+ LLT IdxTy = MRI.getType(Idx);
+ int64_t PartIdx = IdxVal / NewNumElts;
+ auto NewIdx =
+ MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
- if (DstTy == WidenedDstTy)
- MIRBuilder.buildConcatVectors(DstReg, ConcatOps);
- else {
- auto Concat = MIRBuilder.buildConcatVectors(WidenedDstTy, ConcatOps);
- MIRBuilder.buildExtract(DstReg, Concat, 0);
+ if (IsInsert) {
+ LLT PartTy = MRI.getType(VecParts[PartIdx]);
+
+ // Use the adjusted index to insert into one of the subvectors.
+ auto InsertPart = MIRBuilder.buildInsertVectorElement(
+ PartTy, VecParts[PartIdx], InsertVal, NewIdx);
+ VecParts[PartIdx] = InsertPart.getReg(0);
+
+ // Recombine the inserted subvector with the others to reform the result
+ // vector.
+ buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
+ } else {
+ MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
+ }
+
+ MI.eraseFromParent();
+ return Legalized;
}
- MI.eraseFromParent();
- return Legalized;
+ // With a variable index, we can't perform the operation in a smaller type, so
+ // we're forced to expand this.
+ //
+ // TODO: We could emit a chain of compare/select to figure out which piece to
+ // index.
+ return lowerExtractInsertVectorElt(MI);
}
LegalizerHelper::LegalizeResult
@@ -3214,7 +3773,8 @@ LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx,
if (NumParts == -1)
return UnableToLegalize;
- const LLT OffsetTy = LLT::scalar(MRI.getType(AddrReg).getScalarSizeInBits());
+ LLT PtrTy = MRI.getType(AddrReg);
+ const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
unsigned TotalSize = ValTy.getSizeInBits();
@@ -3412,6 +3972,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_ADD:
case G_SUB:
case G_MUL:
+ case G_PTR_ADD:
case G_SMULH:
case G_UMULH:
case G_FADD:
@@ -3435,6 +3996,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_FFLOOR:
case G_FRINT:
case G_INTRINSIC_ROUND:
+ case G_INTRINSIC_ROUNDEVEN:
case G_INTRINSIC_TRUNC:
case G_FCOS:
case G_FSIN:
@@ -3466,6 +4028,8 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_SHL:
case G_LSHR:
case G_ASHR:
+ case G_SSHLSAT:
+ case G_USHLSAT:
case G_CTLZ:
case G_CTLZ_ZERO_UNDEF:
case G_CTTZ:
@@ -3496,7 +4060,15 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_UNMERGE_VALUES:
return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
case G_BUILD_VECTOR:
- return fewerElementsVectorBuildVector(MI, TypeIdx, NarrowTy);
+ assert(TypeIdx == 0 && "not a vector type index");
+ return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
+ case G_CONCAT_VECTORS:
+ if (TypeIdx != 1) // TODO: This probably does work as expected already.
+ return UnableToLegalize;
+ return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
+ case G_EXTRACT_VECTOR_ELT:
+ case G_INSERT_VECTOR_ELT:
+ return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
case G_LOAD:
case G_STORE:
return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
@@ -4268,9 +4840,9 @@ LegalizerHelper::narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx,
}
LegalizerHelper::LegalizeResult
-LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
+LegalizerHelper::lowerBitCount(MachineInstr &MI) {
unsigned Opc = MI.getOpcode();
- auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
+ const auto &TII = MIRBuilder.getTII();
auto isSupported = [this](const LegalityQuery &Q) {
auto QAction = LI.getAction(Q).Action;
return QAction == Legal || QAction == Libcall || QAction == Custom;
@@ -4358,15 +4930,15 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
// unless the target has ctlz but not ctpop, in which case we use:
// { return 32 - nlz(~x & (x-1)); }
// Ref: "Hacker's Delight" by Henry Warren
- auto MIBCstNeg1 = MIRBuilder.buildConstant(Ty, -1);
- auto MIBNot = MIRBuilder.buildXor(Ty, SrcReg, MIBCstNeg1);
+ auto MIBCstNeg1 = MIRBuilder.buildConstant(SrcTy, -1);
+ auto MIBNot = MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1);
auto MIBTmp = MIRBuilder.buildAnd(
- Ty, MIBNot, MIRBuilder.buildAdd(Ty, SrcReg, MIBCstNeg1));
- if (!isSupported({TargetOpcode::G_CTPOP, {Ty, Ty}}) &&
- isSupported({TargetOpcode::G_CTLZ, {Ty, Ty}})) {
- auto MIBCstLen = MIRBuilder.buildConstant(Ty, Len);
+ SrcTy, MIBNot, MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1));
+ if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
+ isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
+ auto MIBCstLen = MIRBuilder.buildConstant(SrcTy, Len);
MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen,
- MIRBuilder.buildCTLZ(Ty, MIBTmp));
+ MIRBuilder.buildCTLZ(SrcTy, MIBTmp));
MI.eraseFromParent();
return Legalized;
}
@@ -4375,6 +4947,8 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
return Legalized;
}
case TargetOpcode::G_CTPOP: {
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT Ty = MRI.getType(SrcReg);
unsigned Size = Ty.getSizeInBits();
MachineIRBuilder &B = MIRBuilder;
@@ -4384,11 +4958,11 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
// B2Count = val - { (val >> 1) & 0x55555555 }
// since it gives same result in blocks of 2 with one instruction less.
auto C_1 = B.buildConstant(Ty, 1);
- auto B2Set1LoTo1Hi = B.buildLShr(Ty, MI.getOperand(1).getReg(), C_1);
+ auto B2Set1LoTo1Hi = B.buildLShr(Ty, SrcReg, C_1);
APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55));
auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0);
auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
- auto B2Count = B.buildSub(Ty, MI.getOperand(1).getReg(), B2Count1Hi);
+ auto B2Count = B.buildSub(Ty, SrcReg, B2Count1Hi);
// In order to get count in blocks of 4 add values from adjacent block of 2.
// B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 }
@@ -4487,8 +5061,7 @@ LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) {
return Legalized;
}
-LegalizerHelper::LegalizeResult
-LegalizerHelper::lowerUITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) {
Register Dst = MI.getOperand(0).getReg();
Register Src = MI.getOperand(1).getReg();
LLT DstTy = MRI.getType(Dst);
@@ -4516,8 +5089,7 @@ LegalizerHelper::lowerUITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
return UnableToLegalize;
}
-LegalizerHelper::LegalizeResult
-LegalizerHelper::lowerSITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerSITOFP(MachineInstr &MI) {
Register Dst = MI.getOperand(0).getReg();
Register Src = MI.getOperand(1).getReg();
LLT DstTy = MRI.getType(Dst);
@@ -4563,8 +5135,7 @@ LegalizerHelper::lowerSITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
return UnableToLegalize;
}
-LegalizerHelper::LegalizeResult
-LegalizerHelper::lowerFPTOUI(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOUI(MachineInstr &MI) {
Register Dst = MI.getOperand(0).getReg();
Register Src = MI.getOperand(1).getReg();
LLT DstTy = MRI.getType(Dst);
@@ -4781,7 +5352,7 @@ LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) {
}
LegalizerHelper::LegalizeResult
-LegalizerHelper::lowerFPTRUNC(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
+LegalizerHelper::lowerFPTRUNC(MachineInstr &MI) {
Register Dst = MI.getOperand(0).getReg();
Register Src = MI.getOperand(1).getReg();
@@ -4796,6 +5367,20 @@ LegalizerHelper::lowerFPTRUNC(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
return UnableToLegalize;
}
+// TODO: If RHS is a constant SelectionDAGBuilder expands this into a
+// multiplication tree.
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPOWI(MachineInstr &MI) {
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src0 = MI.getOperand(1).getReg();
+ Register Src1 = MI.getOperand(2).getReg();
+ LLT Ty = MRI.getType(Dst);
+
+ auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1);
+ MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags());
+ MI.eraseFromParent();
+ return Legalized;
+}
+
static CmpInst::Predicate minMaxToCompare(unsigned Opc) {
switch (Opc) {
case TargetOpcode::G_SMIN:
@@ -4811,8 +5396,7 @@ static CmpInst::Predicate minMaxToCompare(unsigned Opc) {
}
}
-LegalizerHelper::LegalizeResult
-LegalizerHelper::lowerMinMax(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerMinMax(MachineInstr &MI) {
Register Dst = MI.getOperand(0).getReg();
Register Src0 = MI.getOperand(1).getReg();
Register Src1 = MI.getOperand(2).getReg();
@@ -4828,7 +5412,7 @@ LegalizerHelper::lowerMinMax(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
}
LegalizerHelper::LegalizeResult
-LegalizerHelper::lowerFCopySign(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
+LegalizerHelper::lowerFCopySign(MachineInstr &MI) {
Register Dst = MI.getOperand(0).getReg();
Register Src0 = MI.getOperand(1).getReg();
Register Src1 = MI.getOperand(2).getReg();
@@ -5050,6 +5634,71 @@ LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) {
return Legalized;
}
+/// Lower a vector extract or insert by writing the vector to a stack temporary
+/// and reloading the element or vector.
+///
+/// %dst = G_EXTRACT_VECTOR_ELT %vec, %idx
+/// =>
+/// %stack_temp = G_FRAME_INDEX
+/// G_STORE %vec, %stack_temp
+/// %idx = clamp(%idx, %vec.getNumElements())
+/// %element_ptr = G_PTR_ADD %stack_temp, %idx
+/// %dst = G_LOAD %element_ptr
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerExtractInsertVectorElt(MachineInstr &MI) {
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcVec = MI.getOperand(1).getReg();
+ Register InsertVal;
+ if (MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
+ InsertVal = MI.getOperand(2).getReg();
+
+ Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
+
+ LLT VecTy = MRI.getType(SrcVec);
+ LLT EltTy = VecTy.getElementType();
+ if (!EltTy.isByteSized()) { // Not implemented.
+ LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n");
+ return UnableToLegalize;
+ }
+
+ unsigned EltBytes = EltTy.getSizeInBytes();
+ Align VecAlign = getStackTemporaryAlignment(VecTy);
+ Align EltAlign;
+
+ MachinePointerInfo PtrInfo;
+ auto StackTemp = createStackTemporary(TypeSize::Fixed(VecTy.getSizeInBytes()),
+ VecAlign, PtrInfo);
+ MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign);
+
+ // Get the pointer to the element, and be sure not to hit undefined behavior
+ // if the index is out of bounds.
+ Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx);
+
+ int64_t IdxVal;
+ if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
+ int64_t Offset = IdxVal * EltBytes;
+ PtrInfo = PtrInfo.getWithOffset(Offset);
+ EltAlign = commonAlignment(VecAlign, Offset);
+ } else {
+ // We lose information with a variable offset.
+ EltAlign = getStackTemporaryAlignment(EltTy);
+ PtrInfo = MachinePointerInfo(MRI.getType(EltPtr).getAddressSpace());
+ }
+
+ if (InsertVal) {
+ // Write the inserted element
+ MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign);
+
+ // Reload the whole vector.
+ MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign);
+ } else {
+ MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign);
+ }
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerShuffleVector(MachineInstr &MI) {
Register DstReg = MI.getOperand(0).getReg();
@@ -5120,7 +5769,6 @@ LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) {
LLT PtrTy = MRI.getType(Dst);
LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
- const auto &TLI = *MF.getSubtarget().getTargetLowering();
Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
@@ -5266,6 +5914,185 @@ LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) {
}
LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerAddSubSatToMinMax(MachineInstr &MI) {
+ Register Res = MI.getOperand(0).getReg();
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+ LLT Ty = MRI.getType(Res);
+ bool IsSigned;
+ bool IsAdd;
+ unsigned BaseOp;
+ switch (MI.getOpcode()) {
+ default:
+ llvm_unreachable("unexpected addsat/subsat opcode");
+ case TargetOpcode::G_UADDSAT:
+ IsSigned = false;
+ IsAdd = true;
+ BaseOp = TargetOpcode::G_ADD;
+ break;
+ case TargetOpcode::G_SADDSAT:
+ IsSigned = true;
+ IsAdd = true;
+ BaseOp = TargetOpcode::G_ADD;
+ break;
+ case TargetOpcode::G_USUBSAT:
+ IsSigned = false;
+ IsAdd = false;
+ BaseOp = TargetOpcode::G_SUB;
+ break;
+ case TargetOpcode::G_SSUBSAT:
+ IsSigned = true;
+ IsAdd = false;
+ BaseOp = TargetOpcode::G_SUB;
+ break;
+ }
+
+ if (IsSigned) {
+ // sadd.sat(a, b) ->
+ // hi = 0x7fffffff - smax(a, 0)
+ // lo = 0x80000000 - smin(a, 0)
+ // a + smin(smax(lo, b), hi)
+ // ssub.sat(a, b) ->
+ // lo = smax(a, -1) - 0x7fffffff
+ // hi = smin(a, -1) - 0x80000000
+ // a - smin(smax(lo, b), hi)
+ // TODO: AMDGPU can use a "median of 3" instruction here:
+ // a +/- med3(lo, b, hi)
+ uint64_t NumBits = Ty.getScalarSizeInBits();
+ auto MaxVal =
+ MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(NumBits));
+ auto MinVal =
+ MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
+ MachineInstrBuilder Hi, Lo;
+ if (IsAdd) {
+ auto Zero = MIRBuilder.buildConstant(Ty, 0);
+ Hi = MIRBuilder.buildSub(Ty, MaxVal, MIRBuilder.buildSMax(Ty, LHS, Zero));
+ Lo = MIRBuilder.buildSub(Ty, MinVal, MIRBuilder.buildSMin(Ty, LHS, Zero));
+ } else {
+ auto NegOne = MIRBuilder.buildConstant(Ty, -1);
+ Lo = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMax(Ty, LHS, NegOne),
+ MaxVal);
+ Hi = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMin(Ty, LHS, NegOne),
+ MinVal);
+ }
+ auto RHSClamped =
+ MIRBuilder.buildSMin(Ty, MIRBuilder.buildSMax(Ty, Lo, RHS), Hi);
+ MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped});
+ } else {
+ // uadd.sat(a, b) -> a + umin(~a, b)
+ // usub.sat(a, b) -> a - umin(a, b)
+ Register Not = IsAdd ? MIRBuilder.buildNot(Ty, LHS).getReg(0) : LHS;
+ auto Min = MIRBuilder.buildUMin(Ty, Not, RHS);
+ MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min});
+ }
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerAddSubSatToAddoSubo(MachineInstr &MI) {
+ Register Res = MI.getOperand(0).getReg();
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+ LLT Ty = MRI.getType(Res);
+ LLT BoolTy = Ty.changeElementSize(1);
+ bool IsSigned;
+ bool IsAdd;
+ unsigned OverflowOp;
+ switch (MI.getOpcode()) {
+ default:
+ llvm_unreachable("unexpected addsat/subsat opcode");
+ case TargetOpcode::G_UADDSAT:
+ IsSigned = false;
+ IsAdd = true;
+ OverflowOp = TargetOpcode::G_UADDO;
+ break;
+ case TargetOpcode::G_SADDSAT:
+ IsSigned = true;
+ IsAdd = true;
+ OverflowOp = TargetOpcode::G_SADDO;
+ break;
+ case TargetOpcode::G_USUBSAT:
+ IsSigned = false;
+ IsAdd = false;
+ OverflowOp = TargetOpcode::G_USUBO;
+ break;
+ case TargetOpcode::G_SSUBSAT:
+ IsSigned = true;
+ IsAdd = false;
+ OverflowOp = TargetOpcode::G_SSUBO;
+ break;
+ }
+
+ auto OverflowRes =
+ MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS});
+ Register Tmp = OverflowRes.getReg(0);
+ Register Ov = OverflowRes.getReg(1);
+ MachineInstrBuilder Clamp;
+ if (IsSigned) {
+ // sadd.sat(a, b) ->
+ // {tmp, ov} = saddo(a, b)
+ // ov ? (tmp >>s 31) + 0x80000000 : r
+ // ssub.sat(a, b) ->
+ // {tmp, ov} = ssubo(a, b)
+ // ov ? (tmp >>s 31) + 0x80000000 : r
+ uint64_t NumBits = Ty.getScalarSizeInBits();
+ auto ShiftAmount = MIRBuilder.buildConstant(Ty, NumBits - 1);
+ auto Sign = MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount);
+ auto MinVal =
+ MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
+ Clamp = MIRBuilder.buildAdd(Ty, Sign, MinVal);
+ } else {
+ // uadd.sat(a, b) ->
+ // {tmp, ov} = uaddo(a, b)
+ // ov ? 0xffffffff : tmp
+ // usub.sat(a, b) ->
+ // {tmp, ov} = usubo(a, b)
+ // ov ? 0 : tmp
+ Clamp = MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0);
+ }
+ MIRBuilder.buildSelect(Res, Ov, Clamp, Tmp);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerShlSat(MachineInstr &MI) {
+ assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
+ MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
+ "Expected shlsat opcode!");
+ bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT;
+ Register Res = MI.getOperand(0).getReg();
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+ LLT Ty = MRI.getType(Res);
+ LLT BoolTy = Ty.changeElementSize(1);
+
+ unsigned BW = Ty.getScalarSizeInBits();
+ auto Result = MIRBuilder.buildShl(Ty, LHS, RHS);
+ auto Orig = IsSigned ? MIRBuilder.buildAShr(Ty, Result, RHS)
+ : MIRBuilder.buildLShr(Ty, Result, RHS);
+
+ MachineInstrBuilder SatVal;
+ if (IsSigned) {
+ auto SatMin = MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(BW));
+ auto SatMax = MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(BW));
+ auto Cmp = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, LHS,
+ MIRBuilder.buildConstant(Ty, 0));
+ SatVal = MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax);
+ } else {
+ SatVal = MIRBuilder.buildConstant(Ty, APInt::getMaxValue(BW));
+ }
+ auto Ov = MIRBuilder.buildICmp(CmpInst::ICMP_NE, BoolTy, LHS, Orig);
+ MIRBuilder.buildSelect(Res, Ov, SatVal, Result);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
LegalizerHelper::lowerBswap(MachineInstr &MI) {
Register Dst = MI.getOperand(0).getReg();
Register Src = MI.getOperand(1).getReg();
@@ -5345,8 +6172,6 @@ LegalizerHelper::lowerBitreverse(MachineInstr &MI) {
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerReadWriteRegister(MachineInstr &MI) {
MachineFunction &MF = MIRBuilder.getMF();
- const TargetSubtargetInfo &STI = MF.getSubtarget();
- const TargetLowering *TLI = STI.getTargetLowering();
bool IsRead = MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
int NameOpIdx = IsRead ? 1 : 0;
@@ -5357,7 +6182,7 @@ LegalizerHelper::lowerReadWriteRegister(MachineInstr &MI) {
const MDString *RegStr = cast<MDString>(
cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
- Register PhysReg = TLI->getRegisterByName(RegStr->getString().data(), Ty, MF);
+ Register PhysReg = TLI.getRegisterByName(RegStr->getString().data(), Ty, MF);
if (!PhysReg.isValid())
return UnableToLegalize;
@@ -5369,3 +6194,63 @@ LegalizerHelper::lowerReadWriteRegister(MachineInstr &MI) {
MI.eraseFromParent();
return Legalized;
}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerSMULH_UMULH(MachineInstr &MI) {
+ bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULH;
+ unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
+ Register Result = MI.getOperand(0).getReg();
+ LLT OrigTy = MRI.getType(Result);
+ auto SizeInBits = OrigTy.getScalarSizeInBits();
+ LLT WideTy = OrigTy.changeElementSize(SizeInBits * 2);
+
+ auto LHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(1)});
+ auto RHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(2)});
+ auto Mul = MIRBuilder.buildMul(WideTy, LHS, RHS);
+ unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
+
+ auto ShiftAmt = MIRBuilder.buildConstant(WideTy, SizeInBits);
+ auto Shifted = MIRBuilder.buildInstr(ShiftOp, {WideTy}, {Mul, ShiftAmt});
+ MIRBuilder.buildTrunc(Result, Shifted);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) {
+ // Implement vector G_SELECT in terms of XOR, AND, OR.
+ Register DstReg = MI.getOperand(0).getReg();
+ Register MaskReg = MI.getOperand(1).getReg();
+ Register Op1Reg = MI.getOperand(2).getReg();
+ Register Op2Reg = MI.getOperand(3).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ LLT MaskTy = MRI.getType(MaskReg);
+ LLT Op1Ty = MRI.getType(Op1Reg);
+ if (!DstTy.isVector())
+ return UnableToLegalize;
+
+ // Vector selects can have a scalar predicate. If so, splat into a vector and
+ // finish for later legalization attempts to try again.
+ if (MaskTy.isScalar()) {
+ Register MaskElt = MaskReg;
+ if (MaskTy.getSizeInBits() < DstTy.getScalarSizeInBits())
+ MaskElt = MIRBuilder.buildSExt(DstTy.getElementType(), MaskElt).getReg(0);
+ // Generate a vector splat idiom to be pattern matched later.
+ auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt);
+ Observer.changingInstr(MI);
+ MI.getOperand(1).setReg(ShufSplat.getReg(0));
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+
+ if (MaskTy.getSizeInBits() != Op1Ty.getSizeInBits()) {
+ return UnableToLegalize;
+ }
+
+ auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg);
+ auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
+ auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask);
+ MIRBuilder.buildOr(DstReg, NewOp1, NewOp2);
+ MI.eraseFromParent();
+ return Legalized;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
index 4abd0c4df97a..30acac14bc5f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
@@ -105,6 +105,7 @@ raw_ostream &LegalityQuery::print(raw_ostream &OS) const {
static bool hasNoSimpleLoops(const LegalizeRule &Rule, const LegalityQuery &Q,
const std::pair<unsigned, LLT> &Mutation) {
switch (Rule.getAction()) {
+ case Legal:
case Custom:
case Lower:
case MoreElements:
@@ -122,7 +123,7 @@ static bool mutationIsSane(const LegalizeRule &Rule,
std::pair<unsigned, LLT> Mutation) {
// If the user wants a custom mutation, then we can't really say much about
// it. Return true, and trust that they're doing the right thing.
- if (Rule.getAction() == Custom)
+ if (Rule.getAction() == Custom || Rule.getAction() == Legal)
return true;
const unsigned TypeIdx = Mutation.first;
@@ -147,7 +148,8 @@ static bool mutationIsSane(const LegalizeRule &Rule,
if (NewTy.getNumElements() <= OldElts)
return false;
}
- }
+ } else if (Rule.getAction() == MoreElements)
+ return false;
// Make sure the element type didn't change.
return NewTy.getScalarType() == OldTy.getScalarType();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
index a07416d08614..30c00c63f6f4 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
@@ -11,6 +11,7 @@
#include "llvm/CodeGen/GlobalISel/Localizer.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -56,6 +57,20 @@ bool Localizer::isLocalUse(MachineOperand &MOUse, const MachineInstr &Def,
return InsertMBB == Def.getParent();
}
+bool Localizer::isNonUniquePhiValue(MachineOperand &Op) const {
+ MachineInstr *MI = Op.getParent();
+ if (!MI->isPHI())
+ return false;
+
+ Register SrcReg = Op.getReg();
+ for (unsigned Idx = 1; Idx < MI->getNumOperands(); Idx += 2) {
+ auto &MO = MI->getOperand(Idx);
+ if (&MO != &Op && MO.isReg() && MO.getReg() == SrcReg)
+ return true;
+ }
+ return false;
+}
+
bool Localizer::localizeInterBlock(MachineFunction &MF,
LocalizedSetVecT &LocalizedInstrs) {
bool Changed = false;
@@ -93,6 +108,14 @@ bool Localizer::localizeInterBlock(MachineFunction &MF,
LocalizedInstrs.insert(&MI);
continue;
}
+
+ // If the use is a phi operand that's not unique, don't try to localize.
+ // If we do, we can cause unnecessary instruction bloat by duplicating
+ // into each predecessor block, when the existing one is sufficient and
+ // allows for easier optimization later.
+ if (isNonUniquePhiValue(MOUse))
+ continue;
+
LLVM_DEBUG(dbgs() << "Fixing non-local use\n");
Changed = true;
auto MBBAndReg = std::make_pair(InsertMBB, Reg);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 10f696d6a3b3..67ef02a4e7b2 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -9,8 +9,8 @@
/// This file implements the MachineIRBuidler class.
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
-
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -106,8 +106,8 @@ MachineInstrBuilder MachineIRBuilder::buildConstDbgValue(const Constant &C,
} else if (auto *CFP = dyn_cast<ConstantFP>(&C)) {
MIB.addFPImm(CFP);
} else {
- // Insert %noreg if we didn't find a usable constant and had to drop it.
- MIB.addReg(0U);
+ // Insert $noreg if we didn't find a usable constant and had to drop it.
+ MIB.addReg(Register());
}
MIB.addImm(0).addMetadata(Variable).addMetadata(Expr);
@@ -162,6 +162,11 @@ MachineInstrBuilder MachineIRBuilder::buildJumpTable(const LLT PtrTy,
.addJumpTableIndex(JTI);
}
+void MachineIRBuilder::validateUnaryOp(const LLT Res, const LLT Op0) {
+ assert((Res.isScalar() || Res.isVector()) && "invalid operand type");
+ assert((Res == Op0) && "type mismatch");
+}
+
void MachineIRBuilder::validateBinaryOp(const LLT Res, const LLT Op0,
const LLT Op1) {
assert((Res.isScalar() || Res.isVector()) && "invalid operand type");
@@ -312,17 +317,29 @@ MachineInstrBuilder MachineIRBuilder::buildFConstant(const DstOp &Res,
return buildFConstant(Res, *CFP);
}
-MachineInstrBuilder MachineIRBuilder::buildBrCond(Register Tst,
+MachineInstrBuilder MachineIRBuilder::buildBrCond(const SrcOp &Tst,
MachineBasicBlock &Dest) {
- assert(getMRI()->getType(Tst).isScalar() && "invalid operand type");
+ assert(Tst.getLLTTy(*getMRI()).isScalar() && "invalid operand type");
- return buildInstr(TargetOpcode::G_BRCOND).addUse(Tst).addMBB(&Dest);
+ auto MIB = buildInstr(TargetOpcode::G_BRCOND);
+ Tst.addSrcToMIB(MIB);
+ MIB.addMBB(&Dest);
+ return MIB;
}
-MachineInstrBuilder MachineIRBuilder::buildLoad(const DstOp &Res,
- const SrcOp &Addr,
- MachineMemOperand &MMO) {
- return buildLoadInstr(TargetOpcode::G_LOAD, Res, Addr, MMO);
+MachineInstrBuilder
+MachineIRBuilder::buildLoad(const DstOp &Dst, const SrcOp &Addr,
+ MachinePointerInfo PtrInfo, Align Alignment,
+ MachineMemOperand::Flags MMOFlags,
+ const AAMDNodes &AAInfo) {
+ MMOFlags |= MachineMemOperand::MOLoad;
+ assert((MMOFlags & MachineMemOperand::MOStore) == 0);
+
+ uint64_t Size = MemoryLocation::getSizeOrUnknown(
+ TypeSize::Fixed(Dst.getLLTTy(*getMRI()).getSizeInBytes()));
+ MachineMemOperand *MMO =
+ getMF().getMachineMemOperand(PtrInfo, MMOFlags, Size, Alignment, AAInfo);
+ return buildLoad(Dst, Addr, *MMO);
}
MachineInstrBuilder MachineIRBuilder::buildLoadInstr(unsigned Opcode,
@@ -369,6 +386,21 @@ MachineInstrBuilder MachineIRBuilder::buildStore(const SrcOp &Val,
return MIB;
}
+MachineInstrBuilder
+MachineIRBuilder::buildStore(const SrcOp &Val, const SrcOp &Addr,
+ MachinePointerInfo PtrInfo, Align Alignment,
+ MachineMemOperand::Flags MMOFlags,
+ const AAMDNodes &AAInfo) {
+ MMOFlags |= MachineMemOperand::MOStore;
+ assert((MMOFlags & MachineMemOperand::MOLoad) == 0);
+
+ uint64_t Size = MemoryLocation::getSizeOrUnknown(
+ TypeSize::Fixed(Val.getLLTTy(*getMRI()).getSizeInBytes()));
+ MachineMemOperand *MMO =
+ getMF().getMachineMemOperand(PtrInfo, MMOFlags, Size, Alignment, AAInfo);
+ return buildStore(Val, Addr, *MMO);
+}
+
MachineInstrBuilder MachineIRBuilder::buildAnyExt(const DstOp &Res,
const SrcOp &Op) {
return buildInstr(TargetOpcode::G_ANYEXT, Res, Op);
@@ -603,6 +635,35 @@ MachineIRBuilder::buildBuildVectorTrunc(const DstOp &Res,
return buildInstr(TargetOpcode::G_BUILD_VECTOR_TRUNC, Res, TmpVec);
}
+MachineInstrBuilder MachineIRBuilder::buildShuffleSplat(const DstOp &Res,
+ const SrcOp &Src) {
+ LLT DstTy = Res.getLLTTy(*getMRI());
+ assert(Src.getLLTTy(*getMRI()) == DstTy.getElementType() &&
+ "Expected Src to match Dst elt ty");
+ auto UndefVec = buildUndef(DstTy);
+ auto Zero = buildConstant(LLT::scalar(64), 0);
+ auto InsElt = buildInsertVectorElement(DstTy, UndefVec, Src, Zero);
+ SmallVector<int, 16> ZeroMask(DstTy.getNumElements());
+ return buildShuffleVector(DstTy, InsElt, UndefVec, ZeroMask);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildShuffleVector(const DstOp &Res,
+ const SrcOp &Src1,
+ const SrcOp &Src2,
+ ArrayRef<int> Mask) {
+ LLT DstTy = Res.getLLTTy(*getMRI());
+ LLT Src1Ty = Src1.getLLTTy(*getMRI());
+ LLT Src2Ty = Src2.getLLTTy(*getMRI());
+ assert(Src1Ty.getNumElements() + Src2Ty.getNumElements() >= Mask.size());
+ assert(DstTy.getElementType() == Src1Ty.getElementType() &&
+ DstTy.getElementType() == Src2Ty.getElementType());
+ (void)Src1Ty;
+ (void)Src2Ty;
+ ArrayRef<int> MaskAlloc = getMF().allocateShuffleMask(Mask);
+ return buildInstr(TargetOpcode::G_SHUFFLE_VECTOR, {DstTy}, {Src1, Src2})
+ .addShuffleMask(MaskAlloc);
+}
+
MachineInstrBuilder
MachineIRBuilder::buildConcatVectors(const DstOp &Res, ArrayRef<Register> Ops) {
// Unfortunately to convert from ArrayRef<Register> to ArrayRef<SrcOp>,
@@ -925,6 +986,14 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
SrcOps[1].getLLTTy(*getMRI()), SrcOps[2].getLLTTy(*getMRI()));
break;
}
+ case TargetOpcode::G_FNEG:
+ case TargetOpcode::G_ABS:
+ // All these are unary ops.
+ assert(DstOps.size() == 1 && "Invalid Dst");
+ assert(SrcOps.size() == 1 && "Invalid Srcs");
+ validateUnaryOp(DstOps[0].getLLTTy(*getMRI()),
+ SrcOps[0].getLLTTy(*getMRI()));
+ break;
case TargetOpcode::G_ADD:
case TargetOpcode::G_AND:
case TargetOpcode::G_MUL:
@@ -953,7 +1022,9 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
}
case TargetOpcode::G_SHL:
case TargetOpcode::G_ASHR:
- case TargetOpcode::G_LSHR: {
+ case TargetOpcode::G_LSHR:
+ case TargetOpcode::G_USHLSAT:
+ case TargetOpcode::G_SSHLSAT: {
assert(DstOps.size() == 1 && "Invalid Dst");
assert(SrcOps.size() == 2 && "Invalid Srcs");
validateShiftOp(DstOps[0].getLLTTy(*getMRI()),
@@ -1018,11 +1089,11 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
case TargetOpcode::G_UNMERGE_VALUES: {
assert(!DstOps.empty() && "Invalid trivial sequence");
assert(SrcOps.size() == 1 && "Invalid src for Unmerge");
- assert(std::all_of(DstOps.begin(), DstOps.end(),
- [&, this](const DstOp &Op) {
- return Op.getLLTTy(*getMRI()) ==
- DstOps[0].getLLTTy(*getMRI());
- }) &&
+ assert(llvm::all_of(DstOps,
+ [&, this](const DstOp &Op) {
+ return Op.getLLTTy(*getMRI()) ==
+ DstOps[0].getLLTTy(*getMRI());
+ }) &&
"type mismatch in output list");
assert(DstOps.size() * DstOps[0].getLLTTy(*getMRI()).getSizeInBits() ==
SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() &&
@@ -1032,11 +1103,11 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
case TargetOpcode::G_MERGE_VALUES: {
assert(!SrcOps.empty() && "invalid trivial sequence");
assert(DstOps.size() == 1 && "Invalid Dst");
- assert(std::all_of(SrcOps.begin(), SrcOps.end(),
- [&, this](const SrcOp &Op) {
- return Op.getLLTTy(*getMRI()) ==
- SrcOps[0].getLLTTy(*getMRI());
- }) &&
+ assert(llvm::all_of(SrcOps,
+ [&, this](const SrcOp &Op) {
+ return Op.getLLTTy(*getMRI()) ==
+ SrcOps[0].getLLTTy(*getMRI());
+ }) &&
"type mismatch in input list");
assert(SrcOps.size() * SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() ==
DstOps[0].getLLTTy(*getMRI()).getSizeInBits() &&
@@ -1083,11 +1154,11 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
assert(DstOps.size() == 1 && "Invalid DstOps");
assert(DstOps[0].getLLTTy(*getMRI()).isVector() &&
"Res type must be a vector");
- assert(std::all_of(SrcOps.begin(), SrcOps.end(),
- [&, this](const SrcOp &Op) {
- return Op.getLLTTy(*getMRI()) ==
- SrcOps[0].getLLTTy(*getMRI());
- }) &&
+ assert(llvm::all_of(SrcOps,
+ [&, this](const SrcOp &Op) {
+ return Op.getLLTTy(*getMRI()) ==
+ SrcOps[0].getLLTTy(*getMRI());
+ }) &&
"type mismatch in input list");
assert(SrcOps.size() * SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() ==
DstOps[0].getLLTTy(*getMRI()).getSizeInBits() &&
@@ -1100,11 +1171,11 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
assert(DstOps.size() == 1 && "Invalid DstOps");
assert(DstOps[0].getLLTTy(*getMRI()).isVector() &&
"Res type must be a vector");
- assert(std::all_of(SrcOps.begin(), SrcOps.end(),
- [&, this](const SrcOp &Op) {
- return Op.getLLTTy(*getMRI()) ==
- SrcOps[0].getLLTTy(*getMRI());
- }) &&
+ assert(llvm::all_of(SrcOps,
+ [&, this](const SrcOp &Op) {
+ return Op.getLLTTy(*getMRI()) ==
+ SrcOps[0].getLLTTy(*getMRI());
+ }) &&
"type mismatch in input list");
if (SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() ==
DstOps[0].getLLTTy(*getMRI()).getElementType().getSizeInBits())
@@ -1115,12 +1186,12 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
assert(DstOps.size() == 1 && "Invalid DstOps");
assert((!SrcOps.empty() || SrcOps.size() < 2) &&
"Must have at least 2 operands");
- assert(std::all_of(SrcOps.begin(), SrcOps.end(),
- [&, this](const SrcOp &Op) {
- return (Op.getLLTTy(*getMRI()).isVector() &&
- Op.getLLTTy(*getMRI()) ==
- SrcOps[0].getLLTTy(*getMRI()));
- }) &&
+ assert(llvm::all_of(SrcOps,
+ [&, this](const SrcOp &Op) {
+ return (Op.getLLTTy(*getMRI()).isVector() &&
+ Op.getLLTTy(*getMRI()) ==
+ SrcOps[0].getLLTTy(*getMRI()));
+ }) &&
"type mismatch in input list");
assert(SrcOps.size() * SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() ==
DstOps[0].getLLTTy(*getMRI()).getSizeInBits() &&
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
index 255ea693b5c4..e2a963747101 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
@@ -421,8 +421,7 @@ RegisterBankInfo::getInstrPossibleMappings(const MachineInstr &MI) const {
// Then the alternative mapping, if any.
InstructionMappings AltMappings = getInstrAlternativeMappings(MI);
- for (const InstructionMapping *AltMapping : AltMappings)
- PossibleMappings.push_back(AltMapping);
+ append_range(PossibleMappings, AltMappings);
#ifndef NDEBUG
for (const InstructionMapping *Mapping : PossibleMappings)
assert(Mapping->verify(MI) && "Mapping is invalid");
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 8a7fb4fbbf2d..cd2483224489 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -11,8 +11,11 @@
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/ADT/APFloat.h"
-#include "llvm/ADT/Twine.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
+#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -20,13 +23,16 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/StackProtector.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/Constants.h"
+#include "llvm/Target/TargetMachine.h"
#define DEBUG_TYPE "globalisel-utils"
using namespace llvm;
+using namespace MIPatternMatch;
Register llvm::constrainRegToClass(MachineRegisterInfo &MRI,
const TargetInstrInfo &TII,
@@ -42,7 +48,7 @@ Register llvm::constrainOperandRegClass(
const MachineFunction &MF, const TargetRegisterInfo &TRI,
MachineRegisterInfo &MRI, const TargetInstrInfo &TII,
const RegisterBankInfo &RBI, MachineInstr &InsertPt,
- const TargetRegisterClass &RegClass, const MachineOperand &RegMO) {
+ const TargetRegisterClass &RegClass, MachineOperand &RegMO) {
Register Reg = RegMO.getReg();
// Assume physical registers are properly constrained.
assert(Register::isVirtualRegister(Reg) && "PhysReg not implemented");
@@ -63,6 +69,13 @@ Register llvm::constrainOperandRegClass(
TII.get(TargetOpcode::COPY), Reg)
.addReg(ConstrainedReg);
}
+ if (GISelChangeObserver *Observer = MF.getObserver()) {
+ Observer->changingInstr(*RegMO.getParent());
+ }
+ RegMO.setReg(ConstrainedReg);
+ if (GISelChangeObserver *Observer = MF.getObserver()) {
+ Observer->changedInstr(*RegMO.getParent());
+ }
} else {
if (GISelChangeObserver *Observer = MF.getObserver()) {
if (!RegMO.isDef()) {
@@ -80,7 +93,7 @@ Register llvm::constrainOperandRegClass(
const MachineFunction &MF, const TargetRegisterInfo &TRI,
MachineRegisterInfo &MRI, const TargetInstrInfo &TII,
const RegisterBankInfo &RBI, MachineInstr &InsertPt, const MCInstrDesc &II,
- const MachineOperand &RegMO, unsigned OpIdx) {
+ MachineOperand &RegMO, unsigned OpIdx) {
Register Reg = RegMO.getReg();
// Assume physical registers are properly constrained.
assert(Register::isVirtualRegister(Reg) && "PhysReg not implemented");
@@ -150,8 +163,7 @@ bool llvm::constrainSelectedInstRegOperands(MachineInstr &I,
// If the operand is a vreg, we should constrain its regclass, and only
// insert COPYs if that's impossible.
// constrainOperandRegClass does that for us.
- MO.setReg(constrainOperandRegClass(MF, TRI, MRI, TII, RBI, I, I.getDesc(),
- MO, OpI));
+ constrainOperandRegClass(MF, TRI, MRI, TII, RBI, I, I.getDesc(), MO, OpI);
// Tie uses to defs as indicated in MCInstrDesc if this hasn't already been
// done.
@@ -180,6 +192,14 @@ bool llvm::canReplaceReg(Register DstReg, Register SrcReg,
bool llvm::isTriviallyDead(const MachineInstr &MI,
const MachineRegisterInfo &MRI) {
+ // FIXME: This logical is mostly duplicated with
+ // DeadMachineInstructionElim::isDead. Why is LOCAL_ESCAPE not considered in
+ // MachineInstr::isLabel?
+
+ // Don't delete frame allocation labels.
+ if (MI.getOpcode() == TargetOpcode::LOCAL_ESCAPE)
+ return false;
+
// If we can move an instruction, we can remove it. Otherwise, it has
// a side-effect of some sort.
bool SawStore = false;
@@ -242,8 +262,8 @@ void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC,
reportGISelFailure(MF, TPC, MORE, R);
}
-Optional<int64_t> llvm::getConstantVRegVal(Register VReg,
- const MachineRegisterInfo &MRI) {
+Optional<APInt> llvm::getConstantVRegVal(Register VReg,
+ const MachineRegisterInfo &MRI) {
Optional<ValueAndVReg> ValAndVReg =
getConstantVRegValWithLookThrough(VReg, MRI, /*LookThroughInstrs*/ false);
assert((!ValAndVReg || ValAndVReg->VReg == VReg) &&
@@ -253,9 +273,17 @@ Optional<int64_t> llvm::getConstantVRegVal(Register VReg,
return ValAndVReg->Value;
}
+Optional<int64_t> llvm::getConstantVRegSExtVal(Register VReg,
+ const MachineRegisterInfo &MRI) {
+ Optional<APInt> Val = getConstantVRegVal(VReg, MRI);
+ if (Val && Val->getBitWidth() <= 64)
+ return Val->getSExtValue();
+ return None;
+}
+
Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough(
Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs,
- bool HandleFConstant) {
+ bool HandleFConstant, bool LookThroughAnyExt) {
SmallVector<std::pair<unsigned, unsigned>, 4> SeenOpcodes;
MachineInstr *MI;
auto IsConstantOpcode = [HandleFConstant](unsigned Opcode) {
@@ -282,6 +310,10 @@ Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough(
while ((MI = MRI.getVRegDef(VReg)) && !IsConstantOpcode(MI->getOpcode()) &&
LookThroughInstrs) {
switch (MI->getOpcode()) {
+ case TargetOpcode::G_ANYEXT:
+ if (!LookThroughAnyExt)
+ return None;
+ LLVM_FALLTHROUGH;
case TargetOpcode::G_TRUNC:
case TargetOpcode::G_SEXT:
case TargetOpcode::G_ZEXT:
@@ -315,6 +347,7 @@ Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough(
case TargetOpcode::G_TRUNC:
Val = Val.trunc(OpcodeAndSize.second);
break;
+ case TargetOpcode::G_ANYEXT:
case TargetOpcode::G_SEXT:
Val = Val.sext(OpcodeAndSize.second);
break;
@@ -324,13 +357,10 @@ Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough(
}
}
- if (Val.getBitWidth() > 64)
- return None;
-
- return ValueAndVReg{Val.getSExtValue(), VReg};
+ return ValueAndVReg{Val, VReg};
}
-const llvm::ConstantFP *
+const ConstantFP *
llvm::getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI) {
MachineInstr *MI = MRI.getVRegDef(VReg);
if (TargetOpcode::G_FCONSTANT != MI->getOpcode())
@@ -338,15 +368,8 @@ llvm::getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI) {
return MI->getOperand(1).getFPImm();
}
-namespace {
-struct DefinitionAndSourceRegister {
- llvm::MachineInstr *MI;
- Register Reg;
-};
-} // namespace
-
-static llvm::Optional<DefinitionAndSourceRegister>
-getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI) {
+Optional<DefinitionAndSourceRegister>
+llvm::getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI) {
Register DefSrcReg = Reg;
auto *DefMI = MRI.getVRegDef(Reg);
auto DstTy = MRI.getType(DefMI->getOperand(0).getReg());
@@ -355,7 +378,7 @@ getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI) {
while (DefMI->getOpcode() == TargetOpcode::COPY) {
Register SrcReg = DefMI->getOperand(1).getReg();
auto SrcTy = MRI.getType(SrcReg);
- if (!SrcTy.isValid() || SrcTy != DstTy)
+ if (!SrcTy.isValid())
break;
DefMI = MRI.getVRegDef(SrcReg);
DefSrcReg = SrcReg;
@@ -363,8 +386,8 @@ getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI) {
return DefinitionAndSourceRegister{DefMI, DefSrcReg};
}
-llvm::MachineInstr *llvm::getDefIgnoringCopies(Register Reg,
- const MachineRegisterInfo &MRI) {
+MachineInstr *llvm::getDefIgnoringCopies(Register Reg,
+ const MachineRegisterInfo &MRI) {
Optional<DefinitionAndSourceRegister> DefSrcReg =
getDefSrcRegIgnoringCopies(Reg, MRI);
return DefSrcReg ? DefSrcReg->MI : nullptr;
@@ -377,8 +400,8 @@ Register llvm::getSrcRegIgnoringCopies(Register Reg,
return DefSrcReg ? DefSrcReg->Reg : Register();
}
-llvm::MachineInstr *llvm::getOpcodeDef(unsigned Opcode, Register Reg,
- const MachineRegisterInfo &MRI) {
+MachineInstr *llvm::getOpcodeDef(unsigned Opcode, Register Reg,
+ const MachineRegisterInfo &MRI) {
MachineInstr *DefMI = getDefIgnoringCopies(Reg, MRI);
return DefMI && DefMI->getOpcode() == Opcode ? DefMI : nullptr;
}
@@ -407,9 +430,8 @@ Optional<APInt> llvm::ConstantFoldBinOp(unsigned Opcode, const Register Op1,
if (!MaybeOp1Cst)
return None;
- LLT Ty = MRI.getType(Op1);
- APInt C1(Ty.getSizeInBits(), *MaybeOp1Cst, true);
- APInt C2(Ty.getSizeInBits(), *MaybeOp2Cst, true);
+ const APInt &C1 = *MaybeOp1Cst;
+ const APInt &C2 = *MaybeOp2Cst;
switch (Opcode) {
default:
break;
@@ -458,7 +480,8 @@ bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
if (!DefMI)
return false;
- if (DefMI->getFlag(MachineInstr::FmNoNans))
+ const TargetMachine& TM = DefMI->getMF()->getTarget();
+ if (DefMI->getFlag(MachineInstr::FmNoNans) || TM.Options.NoNaNsFPMath)
return true;
if (SNaN) {
@@ -489,75 +512,304 @@ Align llvm::inferAlignFromPtrInfo(MachineFunction &MF,
return Align(1);
}
+Register llvm::getFunctionLiveInPhysReg(MachineFunction &MF,
+ const TargetInstrInfo &TII,
+ MCRegister PhysReg,
+ const TargetRegisterClass &RC,
+ LLT RegTy) {
+ DebugLoc DL; // FIXME: Is no location the right choice?
+ MachineBasicBlock &EntryMBB = MF.front();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ Register LiveIn = MRI.getLiveInVirtReg(PhysReg);
+ if (LiveIn) {
+ MachineInstr *Def = MRI.getVRegDef(LiveIn);
+ if (Def) {
+ // FIXME: Should the verifier check this is in the entry block?
+ assert(Def->getParent() == &EntryMBB && "live-in copy not in entry block");
+ return LiveIn;
+ }
+
+ // It's possible the incoming argument register and copy was added during
+ // lowering, but later deleted due to being/becoming dead. If this happens,
+ // re-insert the copy.
+ } else {
+ // The live in register was not present, so add it.
+ LiveIn = MF.addLiveIn(PhysReg, &RC);
+ if (RegTy.isValid())
+ MRI.setType(LiveIn, RegTy);
+ }
+
+ BuildMI(EntryMBB, EntryMBB.begin(), DL, TII.get(TargetOpcode::COPY), LiveIn)
+ .addReg(PhysReg);
+ if (!EntryMBB.isLiveIn(PhysReg))
+ EntryMBB.addLiveIn(PhysReg);
+ return LiveIn;
+}
+
Optional<APInt> llvm::ConstantFoldExtOp(unsigned Opcode, const Register Op1,
uint64_t Imm,
const MachineRegisterInfo &MRI) {
auto MaybeOp1Cst = getConstantVRegVal(Op1, MRI);
if (MaybeOp1Cst) {
- LLT Ty = MRI.getType(Op1);
- APInt C1(Ty.getSizeInBits(), *MaybeOp1Cst, true);
switch (Opcode) {
default:
break;
- case TargetOpcode::G_SEXT_INREG:
- return C1.trunc(Imm).sext(C1.getBitWidth());
+ case TargetOpcode::G_SEXT_INREG: {
+ LLT Ty = MRI.getType(Op1);
+ return MaybeOp1Cst->trunc(Imm).sext(Ty.getScalarSizeInBits());
+ }
}
}
return None;
}
+bool llvm::isKnownToBeAPowerOfTwo(Register Reg, const MachineRegisterInfo &MRI,
+ GISelKnownBits *KB) {
+ Optional<DefinitionAndSourceRegister> DefSrcReg =
+ getDefSrcRegIgnoringCopies(Reg, MRI);
+ if (!DefSrcReg)
+ return false;
+
+ const MachineInstr &MI = *DefSrcReg->MI;
+ const LLT Ty = MRI.getType(Reg);
+
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_CONSTANT: {
+ unsigned BitWidth = Ty.getScalarSizeInBits();
+ const ConstantInt *CI = MI.getOperand(1).getCImm();
+ return CI->getValue().zextOrTrunc(BitWidth).isPowerOf2();
+ }
+ case TargetOpcode::G_SHL: {
+ // A left-shift of a constant one will have exactly one bit set because
+ // shifting the bit off the end is undefined.
+
+ // TODO: Constant splat
+ if (auto ConstLHS = getConstantVRegVal(MI.getOperand(1).getReg(), MRI)) {
+ if (*ConstLHS == 1)
+ return true;
+ }
+
+ break;
+ }
+ case TargetOpcode::G_LSHR: {
+ if (auto ConstLHS = getConstantVRegVal(MI.getOperand(1).getReg(), MRI)) {
+ if (ConstLHS->isSignMask())
+ return true;
+ }
+
+ break;
+ }
+ default:
+ break;
+ }
+
+ // TODO: Are all operands of a build vector constant powers of two?
+ if (!KB)
+ return false;
+
+ // More could be done here, though the above checks are enough
+ // to handle some common cases.
+
+ // Fall back to computeKnownBits to catch other known cases.
+ KnownBits Known = KB->getKnownBits(Reg);
+ return (Known.countMaxPopulation() == 1) && (Known.countMinPopulation() == 1);
+}
+
void llvm::getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU) {
AU.addPreserved<StackProtector>();
}
-LLT llvm::getLCMType(LLT Ty0, LLT Ty1) {
- if (!Ty0.isVector() && !Ty1.isVector()) {
- unsigned Mul = Ty0.getSizeInBits() * Ty1.getSizeInBits();
- int GCDSize = greatestCommonDivisor(Ty0.getSizeInBits(),
- Ty1.getSizeInBits());
- return LLT::scalar(Mul / GCDSize);
- }
+static unsigned getLCMSize(unsigned OrigSize, unsigned TargetSize) {
+ unsigned Mul = OrigSize * TargetSize;
+ unsigned GCDSize = greatestCommonDivisor(OrigSize, TargetSize);
+ return Mul / GCDSize;
+}
+
+LLT llvm::getLCMType(LLT OrigTy, LLT TargetTy) {
+ const unsigned OrigSize = OrigTy.getSizeInBits();
+ const unsigned TargetSize = TargetTy.getSizeInBits();
+
+ if (OrigSize == TargetSize)
+ return OrigTy;
+
+ if (OrigTy.isVector()) {
+ const LLT OrigElt = OrigTy.getElementType();
- if (Ty0.isVector() && !Ty1.isVector()) {
- assert(Ty0.getElementType() == Ty1 && "not yet handled");
- return Ty0;
+ if (TargetTy.isVector()) {
+ const LLT TargetElt = TargetTy.getElementType();
+
+ if (OrigElt.getSizeInBits() == TargetElt.getSizeInBits()) {
+ int GCDElts = greatestCommonDivisor(OrigTy.getNumElements(),
+ TargetTy.getNumElements());
+ // Prefer the original element type.
+ int Mul = OrigTy.getNumElements() * TargetTy.getNumElements();
+ return LLT::vector(Mul / GCDElts, OrigTy.getElementType());
+ }
+ } else {
+ if (OrigElt.getSizeInBits() == TargetSize)
+ return OrigTy;
+ }
+
+ unsigned LCMSize = getLCMSize(OrigSize, TargetSize);
+ return LLT::vector(LCMSize / OrigElt.getSizeInBits(), OrigElt);
}
- if (Ty1.isVector() && !Ty0.isVector()) {
- assert(Ty1.getElementType() == Ty0 && "not yet handled");
- return Ty1;
+ if (TargetTy.isVector()) {
+ unsigned LCMSize = getLCMSize(OrigSize, TargetSize);
+ return LLT::vector(LCMSize / OrigSize, OrigTy);
}
- if (Ty0.isVector() && Ty1.isVector()) {
- assert(Ty0.getElementType() == Ty1.getElementType() && "not yet handled");
+ unsigned LCMSize = getLCMSize(OrigSize, TargetSize);
+
+ // Preserve pointer types.
+ if (LCMSize == OrigSize)
+ return OrigTy;
+ if (LCMSize == TargetSize)
+ return TargetTy;
- int GCDElts = greatestCommonDivisor(Ty0.getNumElements(),
- Ty1.getNumElements());
+ return LLT::scalar(LCMSize);
+}
- int Mul = Ty0.getNumElements() * Ty1.getNumElements();
- return LLT::vector(Mul / GCDElts, Ty0.getElementType());
+LLT llvm::getGCDType(LLT OrigTy, LLT TargetTy) {
+ const unsigned OrigSize = OrigTy.getSizeInBits();
+ const unsigned TargetSize = TargetTy.getSizeInBits();
+
+ if (OrigSize == TargetSize)
+ return OrigTy;
+
+ if (OrigTy.isVector()) {
+ LLT OrigElt = OrigTy.getElementType();
+ if (TargetTy.isVector()) {
+ LLT TargetElt = TargetTy.getElementType();
+ if (OrigElt.getSizeInBits() == TargetElt.getSizeInBits()) {
+ int GCD = greatestCommonDivisor(OrigTy.getNumElements(),
+ TargetTy.getNumElements());
+ return LLT::scalarOrVector(GCD, OrigElt);
+ }
+ } else {
+ // If the source is a vector of pointers, return a pointer element.
+ if (OrigElt.getSizeInBits() == TargetSize)
+ return OrigElt;
+ }
+
+ unsigned GCD = greatestCommonDivisor(OrigSize, TargetSize);
+ if (GCD == OrigElt.getSizeInBits())
+ return OrigElt;
+
+ // If we can't produce the original element type, we have to use a smaller
+ // scalar.
+ if (GCD < OrigElt.getSizeInBits())
+ return LLT::scalar(GCD);
+ return LLT::vector(GCD / OrigElt.getSizeInBits(), OrigElt);
+ }
+
+ if (TargetTy.isVector()) {
+ // Try to preserve the original element type.
+ LLT TargetElt = TargetTy.getElementType();
+ if (TargetElt.getSizeInBits() == OrigSize)
+ return OrigTy;
}
- llvm_unreachable("not yet handled");
+ unsigned GCD = greatestCommonDivisor(OrigSize, TargetSize);
+ return LLT::scalar(GCD);
}
-LLT llvm::getGCDType(LLT OrigTy, LLT TargetTy) {
- if (OrigTy.isVector() && TargetTy.isVector()) {
- assert(OrigTy.getElementType() == TargetTy.getElementType());
- int GCD = greatestCommonDivisor(OrigTy.getNumElements(),
- TargetTy.getNumElements());
- return LLT::scalarOrVector(GCD, OrigTy.getElementType());
+Optional<int> llvm::getSplatIndex(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
+ "Only G_SHUFFLE_VECTOR can have a splat index!");
+ ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
+ auto FirstDefinedIdx = find_if(Mask, [](int Elt) { return Elt >= 0; });
+
+ // If all elements are undefined, this shuffle can be considered a splat.
+ // Return 0 for better potential for callers to simplify.
+ if (FirstDefinedIdx == Mask.end())
+ return 0;
+
+ // Make sure all remaining elements are either undef or the same
+ // as the first non-undef value.
+ int SplatValue = *FirstDefinedIdx;
+ if (any_of(make_range(std::next(FirstDefinedIdx), Mask.end()),
+ [&SplatValue](int Elt) { return Elt >= 0 && Elt != SplatValue; }))
+ return None;
+
+ return SplatValue;
+}
+
+static bool isBuildVectorOp(unsigned Opcode) {
+ return Opcode == TargetOpcode::G_BUILD_VECTOR ||
+ Opcode == TargetOpcode::G_BUILD_VECTOR_TRUNC;
+}
+
+// TODO: Handle mixed undef elements.
+static bool isBuildVectorConstantSplat(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI,
+ int64_t SplatValue) {
+ if (!isBuildVectorOp(MI.getOpcode()))
+ return false;
+
+ const unsigned NumOps = MI.getNumOperands();
+ for (unsigned I = 1; I != NumOps; ++I) {
+ Register Element = MI.getOperand(I).getReg();
+ if (!mi_match(Element, MRI, m_SpecificICst(SplatValue)))
+ return false;
}
- if (OrigTy.isVector() && !TargetTy.isVector()) {
- assert(OrigTy.getElementType() == TargetTy);
- return TargetTy;
+ return true;
+}
+
+Optional<int64_t>
+llvm::getBuildVectorConstantSplat(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI) {
+ if (!isBuildVectorOp(MI.getOpcode()))
+ return None;
+
+ const unsigned NumOps = MI.getNumOperands();
+ Optional<int64_t> Scalar;
+ for (unsigned I = 1; I != NumOps; ++I) {
+ Register Element = MI.getOperand(I).getReg();
+ int64_t ElementValue;
+ if (!mi_match(Element, MRI, m_ICst(ElementValue)))
+ return None;
+ if (!Scalar)
+ Scalar = ElementValue;
+ else if (*Scalar != ElementValue)
+ return None;
}
- assert(!OrigTy.isVector() && !TargetTy.isVector() &&
- "GCD type of vector and scalar not implemented");
+ return Scalar;
+}
+
+bool llvm::isBuildVectorAllZeros(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI) {
+ return isBuildVectorConstantSplat(MI, MRI, 0);
+}
- int GCD = greatestCommonDivisor(OrigTy.getSizeInBits(),
- TargetTy.getSizeInBits());
- return LLT::scalar(GCD);
+bool llvm::isBuildVectorAllOnes(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI) {
+ return isBuildVectorConstantSplat(MI, MRI, -1);
+}
+
+bool llvm::isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector,
+ bool IsFP) {
+ switch (TLI.getBooleanContents(IsVector, IsFP)) {
+ case TargetLowering::UndefinedBooleanContent:
+ return Val & 0x1;
+ case TargetLowering::ZeroOrOneBooleanContent:
+ return Val == 1;
+ case TargetLowering::ZeroOrNegativeOneBooleanContent:
+ return Val == -1;
+ }
+ llvm_unreachable("Invalid boolean contents");
+}
+
+int64_t llvm::getICmpTrueVal(const TargetLowering &TLI, bool IsVector,
+ bool IsFP) {
+ switch (TLI.getBooleanContents(IsVector, IsFP)) {
+ case TargetLowering::UndefinedBooleanContent:
+ case TargetLowering::ZeroOrOneBooleanContent:
+ return 1;
+ case TargetLowering::ZeroOrNegativeOneBooleanContent:
+ return -1;
+ }
+ llvm_unreachable("Invalid boolean contents");
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp
index 1e20c02ba160..6c1ce4c1efb0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp
@@ -223,8 +223,9 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
// FIXME: Find better heuristics
llvm::stable_sort(
Globals, [&DL](const GlobalVariable *GV1, const GlobalVariable *GV2) {
- return DL.getTypeAllocSize(GV1->getValueType()) <
- DL.getTypeAllocSize(GV2->getValueType());
+ // We don't support scalable global variables.
+ return DL.getTypeAllocSize(GV1->getValueType()).getFixedSize() <
+ DL.getTypeAllocSize(GV2->getValueType()).getFixedSize();
});
// If we want to just blindly group all globals together, do so.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp b/contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp
index 0ba7e920e507..810b10c9c82a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp
@@ -165,7 +165,7 @@ namespace {
Value *InitLoopCount();
// Insert the set_loop_iteration intrinsic.
- void InsertIterationSetup(Value *LoopCountInit);
+ Value *InsertIterationSetup(Value *LoopCountInit);
// Insert the loop_decrement intrinsic.
void InsertLoopDec();
@@ -187,7 +187,7 @@ namespace {
const DataLayout &DL,
OptimizationRemarkEmitter *ORE) :
SE(SE), DL(DL), ORE(ORE), L(Info.L), M(L->getHeader()->getModule()),
- ExitCount(Info.ExitCount),
+ TripCount(Info.TripCount),
CountType(Info.CountType),
ExitBranch(Info.ExitBranch),
LoopDecrement(Info.LoopDecrement),
@@ -202,7 +202,7 @@ namespace {
OptimizationRemarkEmitter *ORE = nullptr;
Loop *L = nullptr;
Module *M = nullptr;
- const SCEV *ExitCount = nullptr;
+ const SCEV *TripCount = nullptr;
Type *CountType = nullptr;
BranchInst *ExitBranch = nullptr;
Value *LoopDecrement = nullptr;
@@ -234,7 +234,7 @@ bool HardwareLoops::runOnFunction(Function &F) {
for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) {
Loop *L = *I;
- if (!L->getParentLoop())
+ if (L->isOutermost())
TryConvertLoop(L);
}
@@ -298,7 +298,7 @@ bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
}
assert(
- (HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.ExitCount) &&
+ (HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.TripCount) &&
"Hardware Loop must have set exit info.");
BasicBlock *Preheader = L->getLoopPreheader();
@@ -325,11 +325,11 @@ void HardwareLoop::Create() {
return;
}
- InsertIterationSetup(LoopCountInit);
+ Value *Setup = InsertIterationSetup(LoopCountInit);
if (UsePHICounter || ForceHardwareLoopPHI) {
Instruction *LoopDec = InsertLoopRegDec(LoopCountInit);
- Value *EltsRem = InsertPHICounter(LoopCountInit, LoopDec);
+ Value *EltsRem = InsertPHICounter(Setup, LoopDec);
LoopDec->setOperand(0, EltsRem);
UpdateBranch(LoopDec);
} else
@@ -383,18 +383,13 @@ Value *HardwareLoop::InitLoopCount() {
// loop counter and tests that is not zero?
SCEVExpander SCEVE(SE, DL, "loopcnt");
- if (!ExitCount->getType()->isPointerTy() &&
- ExitCount->getType() != CountType)
- ExitCount = SE.getZeroExtendExpr(ExitCount, CountType);
-
- ExitCount = SE.getAddExpr(ExitCount, SE.getOne(CountType));
// If we're trying to use the 'test and set' form of the intrinsic, we need
// to replace a conditional branch that is controlling entry to the loop. It
// is likely (guaranteed?) that the preheader has an unconditional branch to
// the loop header, so also check if it has a single predecessor.
- if (SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, ExitCount,
- SE.getZero(ExitCount->getType()))) {
+ if (SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, TripCount,
+ SE.getZero(TripCount->getType()))) {
LLVM_DEBUG(dbgs() << " - Attempting to use test.set counter.\n");
UseLoopGuard |= ForceGuardLoopEntry;
} else
@@ -402,16 +397,23 @@ Value *HardwareLoop::InitLoopCount() {
BasicBlock *BB = L->getLoopPreheader();
if (UseLoopGuard && BB->getSinglePredecessor() &&
- cast<BranchInst>(BB->getTerminator())->isUnconditional())
- BB = BB->getSinglePredecessor();
+ cast<BranchInst>(BB->getTerminator())->isUnconditional()) {
+ BasicBlock *Predecessor = BB->getSinglePredecessor();
+ // If it's not safe to create a while loop then don't force it and create a
+ // do-while loop instead
+ if (!isSafeToExpandAt(TripCount, Predecessor->getTerminator(), SE))
+ UseLoopGuard = false;
+ else
+ BB = Predecessor;
+ }
- if (!isSafeToExpandAt(ExitCount, BB->getTerminator(), SE)) {
- LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand ExitCount "
- << *ExitCount << "\n");
+ if (!isSafeToExpandAt(TripCount, BB->getTerminator(), SE)) {
+ LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand TripCount "
+ << *TripCount << "\n");
return nullptr;
}
- Value *Count = SCEVE.expandCodeFor(ExitCount, CountType,
+ Value *Count = SCEVE.expandCodeFor(TripCount, CountType,
BB->getTerminator());
// FIXME: We've expanded Count where we hope to insert the counter setting
@@ -430,11 +432,13 @@ Value *HardwareLoop::InitLoopCount() {
return Count;
}
-void HardwareLoop::InsertIterationSetup(Value *LoopCountInit) {
+Value* HardwareLoop::InsertIterationSetup(Value *LoopCountInit) {
IRBuilder<> Builder(BeginBB->getTerminator());
Type *Ty = LoopCountInit->getType();
- Intrinsic::ID ID = UseLoopGuard ?
- Intrinsic::test_set_loop_iterations : Intrinsic::set_loop_iterations;
+ bool UsePhi = UsePHICounter || ForceHardwareLoopPHI;
+ Intrinsic::ID ID = UseLoopGuard ? Intrinsic::test_set_loop_iterations
+ : (UsePhi ? Intrinsic::start_loop_iterations
+ : Intrinsic::set_loop_iterations);
Function *LoopIter = Intrinsic::getDeclaration(M, ID, Ty);
Value *SetCount = Builder.CreateCall(LoopIter, LoopCountInit);
@@ -450,6 +454,7 @@ void HardwareLoop::InsertIterationSetup(Value *LoopCountInit) {
}
LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop counter: "
<< *SetCount << "\n");
+ return UseLoopGuard ? LoopCountInit : SetCount;
}
void HardwareLoop::InsertLoopDec() {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/IfConversion.cpp b/contrib/llvm-project/llvm/lib/CodeGen/IfConversion.cpp
index 1a5c5d685017..37be2eabf5fe 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/IfConversion.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/IfConversion.cpp
@@ -751,7 +751,7 @@ bool IfConverter::CountDuplicatedInstructions(
// A pred-clobbering instruction in the shared portion prevents
// if-conversion.
std::vector<MachineOperand> PredDefs;
- if (TII->DefinesPredicate(*TIB, PredDefs))
+ if (TII->ClobbersPredicate(*TIB, PredDefs, false))
return false;
// If we get all the way to the branch instructions, don't count them.
if (!TIB->isBranch())
@@ -1146,7 +1146,7 @@ void IfConverter::ScanInstructions(BBInfo &BBI,
// FIXME: Make use of PredDefs? e.g. ADDC, SUBC sets predicates but are
// still potentially predicable.
std::vector<MachineOperand> PredDefs;
- if (TII->DefinesPredicate(MI, PredDefs))
+ if (TII->ClobbersPredicate(MI, PredDefs, true))
BBI.ClobbersPred = true;
if (!TII->isPredicable(MI)) {
@@ -2264,8 +2264,7 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
if (ToBBI.IsBrAnalyzable)
ToBBI.BB->normalizeSuccProbs();
- SmallVector<MachineBasicBlock *, 4> FromSuccs(FromMBB.succ_begin(),
- FromMBB.succ_end());
+ SmallVector<MachineBasicBlock *, 4> FromSuccs(FromMBB.successors());
MachineBasicBlock *NBB = getNextBlock(FromMBB);
MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : nullptr;
// The edge probability from ToBBI.BB to FromMBB, which is only needed when
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ImplicitNullChecks.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ImplicitNullChecks.cpp
index 16c9bfc672af..5cdaa9b74e80 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ImplicitNullChecks.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ImplicitNullChecks.cpp
@@ -200,10 +200,16 @@ class ImplicitNullChecks : public MachineFunctionPass {
unsigned PointerReg,
ArrayRef<MachineInstr *> PrevInsts);
+ /// Returns true if \p DependenceMI can clobber the liveIns in NullSucc block
+ /// if it was hoisted to the NullCheck block. This is used by caller
+ /// canHoistInst to decide if DependenceMI can be hoisted safely.
+ bool canDependenceHoistingClobberLiveIns(MachineInstr *DependenceMI,
+ MachineBasicBlock *NullSucc);
+
/// Return true if \p FaultingMI can be hoisted from after the
/// instructions in \p InstsSeenSoFar to before them. Set \p Dependence to a
- /// non-null value if we also need to (and legally can) hoist a depedency.
- bool canHoistInst(MachineInstr *FaultingMI, unsigned PointerReg,
+ /// non-null value if we also need to (and legally can) hoist a dependency.
+ bool canHoistInst(MachineInstr *FaultingMI,
ArrayRef<MachineInstr *> InstsSeenSoFar,
MachineBasicBlock *NullSucc, MachineInstr *&Dependence);
@@ -275,12 +281,12 @@ bool ImplicitNullChecks::canReorder(const MachineInstr *A,
// between A and B here -- for instance, we should not be dealing with heap
// load-store dependencies here.
- for (auto MOA : A->operands()) {
+ for (const auto &MOA : A->operands()) {
if (!(MOA.isReg() && MOA.getReg()))
continue;
Register RegA = MOA.getReg();
- for (auto MOB : B->operands()) {
+ for (const auto &MOB : B->operands()) {
if (!(MOB.isReg() && MOB.getReg()))
continue;
@@ -347,11 +353,9 @@ ImplicitNullChecks::areMemoryOpsAliased(const MachineInstr &MI,
return AR_MayAlias;
continue;
}
- llvm::AliasResult AAResult =
- AA->alias(MemoryLocation(MMO1->getValue(), LocationSize::unknown(),
- MMO1->getAAInfo()),
- MemoryLocation(MMO2->getValue(), LocationSize::unknown(),
- MMO2->getAAInfo()));
+ llvm::AliasResult AAResult = AA->alias(
+ MemoryLocation::getAfter(MMO1->getValue(), MMO1->getAAInfo()),
+ MemoryLocation::getAfter(MMO2->getValue(), MMO2->getAAInfo()));
if (AAResult != NoAlias)
return AR_MayAlias;
}
@@ -363,23 +367,105 @@ ImplicitNullChecks::SuitabilityResult
ImplicitNullChecks::isSuitableMemoryOp(const MachineInstr &MI,
unsigned PointerReg,
ArrayRef<MachineInstr *> PrevInsts) {
- int64_t Offset;
- bool OffsetIsScalable;
- const MachineOperand *BaseOp;
+ // Implementation restriction for faulting_op insertion
+ // TODO: This could be relaxed if we find a test case which warrants it.
+ if (MI.getDesc().getNumDefs() > 1)
+ return SR_Unsuitable;
+ if (!MI.mayLoadOrStore() || MI.isPredicable())
+ return SR_Unsuitable;
+ auto AM = TII->getAddrModeFromMemoryOp(MI, TRI);
+ if (!AM)
+ return SR_Unsuitable;
+ auto AddrMode = *AM;
+ const Register BaseReg = AddrMode.BaseReg, ScaledReg = AddrMode.ScaledReg;
+ int64_t Displacement = AddrMode.Displacement;
- if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI) ||
- !BaseOp->isReg() || BaseOp->getReg() != PointerReg)
+ // We need the base of the memory instruction to be same as the register
+ // where the null check is performed (i.e. PointerReg).
+ if (BaseReg != PointerReg && ScaledReg != PointerReg)
+ return SR_Unsuitable;
+ const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
+ unsigned PointerRegSizeInBits = TRI->getRegSizeInBits(PointerReg, MRI);
+ // Bail out of the sizes of BaseReg, ScaledReg and PointerReg are not the
+ // same.
+ if ((BaseReg &&
+ TRI->getRegSizeInBits(BaseReg, MRI) != PointerRegSizeInBits) ||
+ (ScaledReg &&
+ TRI->getRegSizeInBits(ScaledReg, MRI) != PointerRegSizeInBits))
return SR_Unsuitable;
- // FIXME: This algorithm assumes instructions have fixed-size offsets.
- if (OffsetIsScalable)
+ // Returns true if RegUsedInAddr is used for calculating the displacement
+ // depending on addressing mode. Also calculates the Displacement.
+ auto CalculateDisplacementFromAddrMode = [&](Register RegUsedInAddr,
+ int64_t Multiplier) {
+ // The register can be NoRegister, which is defined as zero for all targets.
+ // Consider instruction of interest as `movq 8(,%rdi,8), %rax`. Here the
+ // ScaledReg is %rdi, while there is no BaseReg.
+ if (!RegUsedInAddr)
+ return false;
+ assert(Multiplier && "expected to be non-zero!");
+ MachineInstr *ModifyingMI = nullptr;
+ for (auto It = std::next(MachineBasicBlock::const_reverse_iterator(&MI));
+ It != MI.getParent()->rend(); It++) {
+ const MachineInstr *CurrMI = &*It;
+ if (CurrMI->modifiesRegister(RegUsedInAddr, TRI)) {
+ ModifyingMI = const_cast<MachineInstr *>(CurrMI);
+ break;
+ }
+ }
+ if (!ModifyingMI)
+ return false;
+ // Check for the const value defined in register by ModifyingMI. This means
+ // all other previous values for that register has been invalidated.
+ int64_t ImmVal;
+ if (!TII->getConstValDefinedInReg(*ModifyingMI, RegUsedInAddr, ImmVal))
+ return false;
+ // Calculate the reg size in bits, since this is needed for bailing out in
+ // case of overflow.
+ int32_t RegSizeInBits = TRI->getRegSizeInBits(RegUsedInAddr, MRI);
+ APInt ImmValC(RegSizeInBits, ImmVal, true /*IsSigned*/);
+ APInt MultiplierC(RegSizeInBits, Multiplier);
+ assert(MultiplierC.isStrictlyPositive() &&
+ "expected to be a positive value!");
+ bool IsOverflow;
+ // Sign of the product depends on the sign of the ImmVal, since Multiplier
+ // is always positive.
+ APInt Product = ImmValC.smul_ov(MultiplierC, IsOverflow);
+ if (IsOverflow)
+ return false;
+ APInt DisplacementC(64, Displacement, true /*isSigned*/);
+ DisplacementC = Product.sadd_ov(DisplacementC, IsOverflow);
+ if (IsOverflow)
+ return false;
+
+ // We only handle diplacements upto 64 bits wide.
+ if (DisplacementC.getActiveBits() > 64)
+ return false;
+ Displacement = DisplacementC.getSExtValue();
+ return true;
+ };
+
+ // If a register used in the address is constant, fold it's effect into the
+ // displacement for ease of analysis.
+ bool BaseRegIsConstVal = false, ScaledRegIsConstVal = false;
+ if (CalculateDisplacementFromAddrMode(BaseReg, 1))
+ BaseRegIsConstVal = true;
+ if (CalculateDisplacementFromAddrMode(ScaledReg, AddrMode.Scale))
+ ScaledRegIsConstVal = true;
+
+ // The register which is not null checked should be part of the Displacement
+ // calculation, otherwise we do not know whether the Displacement is made up
+ // by some symbolic values.
+ // This matters because we do not want to incorrectly assume that load from
+ // falls in the zeroth faulting page in the "sane offset check" below.
+ if ((BaseReg && BaseReg != PointerReg && !BaseRegIsConstVal) ||
+ (ScaledReg && ScaledReg != PointerReg && !ScaledRegIsConstVal))
return SR_Unsuitable;
// We want the mem access to be issued at a sane offset from PointerReg,
// so that if PointerReg is null then the access reliably page faults.
- if (!(MI.mayLoadOrStore() && !MI.isPredicable() &&
- -PageSize < Offset && Offset < PageSize))
+ if (!(-PageSize < Displacement && Displacement < PageSize))
return SR_Unsuitable;
// Finally, check whether the current memory access aliases with previous one.
@@ -393,8 +479,39 @@ ImplicitNullChecks::isSuitableMemoryOp(const MachineInstr &MI,
return SR_Suitable;
}
+bool ImplicitNullChecks::canDependenceHoistingClobberLiveIns(
+ MachineInstr *DependenceMI, MachineBasicBlock *NullSucc) {
+ for (const auto &DependenceMO : DependenceMI->operands()) {
+ if (!(DependenceMO.isReg() && DependenceMO.getReg()))
+ continue;
+
+ // Make sure that we won't clobber any live ins to the sibling block by
+ // hoisting Dependency. For instance, we can't hoist INST to before the
+ // null check (even if it safe, and does not violate any dependencies in
+ // the non_null_block) if %rdx is live in to _null_block.
+ //
+ // test %rcx, %rcx
+ // je _null_block
+ // _non_null_block:
+ // %rdx = INST
+ // ...
+ //
+ // This restriction does not apply to the faulting load inst because in
+ // case the pointer loaded from is in the null page, the load will not
+ // semantically execute, and affect machine state. That is, if the load
+ // was loading into %rax and it faults, the value of %rax should stay the
+ // same as it would have been had the load not have executed and we'd have
+ // branched to NullSucc directly.
+ if (AnyAliasLiveIn(TRI, NullSucc, DependenceMO.getReg()))
+ return true;
+
+ }
+
+ // The dependence does not clobber live-ins in NullSucc block.
+ return false;
+}
+
bool ImplicitNullChecks::canHoistInst(MachineInstr *FaultingMI,
- unsigned PointerReg,
ArrayRef<MachineInstr *> InstsSeenSoFar,
MachineBasicBlock *NullSucc,
MachineInstr *&Dependence) {
@@ -419,37 +536,8 @@ bool ImplicitNullChecks::canHoistInst(MachineInstr *FaultingMI,
if (DependenceMI->mayLoadOrStore())
return false;
- for (auto &DependenceMO : DependenceMI->operands()) {
- if (!(DependenceMO.isReg() && DependenceMO.getReg()))
- continue;
-
- // Make sure that we won't clobber any live ins to the sibling block by
- // hoisting Dependency. For instance, we can't hoist INST to before the
- // null check (even if it safe, and does not violate any dependencies in
- // the non_null_block) if %rdx is live in to _null_block.
- //
- // test %rcx, %rcx
- // je _null_block
- // _non_null_block:
- // %rdx = INST
- // ...
- //
- // This restriction does not apply to the faulting load inst because in
- // case the pointer loaded from is in the null page, the load will not
- // semantically execute, and affect machine state. That is, if the load
- // was loading into %rax and it faults, the value of %rax should stay the
- // same as it would have been had the load not have executed and we'd have
- // branched to NullSucc directly.
- if (AnyAliasLiveIn(TRI, NullSucc, DependenceMO.getReg()))
- return false;
-
- // The Dependency can't be re-defining the base register -- then we won't
- // get the memory operation on the address we want. This is already
- // checked in \c IsSuitableMemoryOp.
- assert(!(DependenceMO.isDef() &&
- TRI->regsOverlap(DependenceMO.getReg(), PointerReg)) &&
- "Should have been checked before!");
- }
+ if (canDependenceHoistingClobberLiveIns(DependenceMI, NullSucc))
+ return false;
auto DepDepResult =
computeDependence(DependenceMI, {InstsSeenSoFar.begin(), DependenceItr});
@@ -486,9 +574,9 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks(
MBP.Predicate == MachineBranchPredicate::PRED_EQ)))
return false;
- // If we cannot erase the test instruction itself, then making the null check
- // implicit does not buy us much.
- if (!MBP.SingleUseCondition)
+ // If there is a separate condition generation instruction, we chose not to
+ // transform unless we can remove both condition and consuming branch.
+ if (MBP.ConditionDef && !MBP.SingleUseCondition)
return false;
MachineBasicBlock *NotNullSucc, *NullSucc;
@@ -506,32 +594,34 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks(
if (NotNullSucc->pred_size() != 1)
return false;
- // To prevent the invalid transformation of the following code:
- //
- // mov %rax, %rcx
- // test %rax, %rax
- // %rax = ...
- // je throw_npe
- // mov(%rcx), %r9
- // mov(%rax), %r10
- //
- // into:
- //
- // mov %rax, %rcx
- // %rax = ....
- // faulting_load_op("movl (%rax), %r10", throw_npe)
- // mov(%rcx), %r9
- //
- // we must ensure that there are no instructions between the 'test' and
- // conditional jump that modify %rax.
const Register PointerReg = MBP.LHS.getReg();
- assert(MBP.ConditionDef->getParent() == &MBB && "Should be in basic block");
-
- for (auto I = MBB.rbegin(); MBP.ConditionDef != &*I; ++I)
- if (I->modifiesRegister(PointerReg, TRI))
- return false;
+ if (MBP.ConditionDef) {
+ // To prevent the invalid transformation of the following code:
+ //
+ // mov %rax, %rcx
+ // test %rax, %rax
+ // %rax = ...
+ // je throw_npe
+ // mov(%rcx), %r9
+ // mov(%rax), %r10
+ //
+ // into:
+ //
+ // mov %rax, %rcx
+ // %rax = ....
+ // faulting_load_op("movl (%rax), %r10", throw_npe)
+ // mov(%rcx), %r9
+ //
+ // we must ensure that there are no instructions between the 'test' and
+ // conditional jump that modify %rax.
+ assert(MBP.ConditionDef->getParent() == &MBB &&
+ "Should be in basic block");
+ for (auto I = MBB.rbegin(); MBP.ConditionDef != &*I; ++I)
+ if (I->modifiesRegister(PointerReg, TRI))
+ return false;
+ }
// Starting with a code fragment like:
//
// test %rax, %rax
@@ -597,17 +687,15 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks(
if (SR == SR_Impossible)
return false;
if (SR == SR_Suitable &&
- canHoistInst(&MI, PointerReg, InstsSeenSoFar, NullSucc, Dependence)) {
+ canHoistInst(&MI, InstsSeenSoFar, NullSucc, Dependence)) {
NullCheckList.emplace_back(&MI, MBP.ConditionDef, &MBB, NotNullSucc,
NullSucc, Dependence);
return true;
}
- // If MI re-defines the PointerReg then we cannot move further.
- if (llvm::any_of(MI.operands(), [&](MachineOperand &MO) {
- return MO.isReg() && MO.getReg() && MO.isDef() &&
- TRI->regsOverlap(MO.getReg(), PointerReg);
- }))
+ // If MI re-defines the PointerReg in a way that changes the value of
+ // PointerReg if it was null, then we cannot move further.
+ if (!TII->preservesZeroValueInReg(&MI, PointerReg, TRI))
return false;
InstsSeenSoFar.push_back(&MI);
}
@@ -712,9 +800,11 @@ void ImplicitNullChecks::rewriteNullChecks(
}
NC.getMemOperation()->eraseFromParent();
- NC.getCheckOperation()->eraseFromParent();
+ if (auto *CheckOp = NC.getCheckOperation())
+ CheckOp->eraseFromParent();
- // Insert an *unconditional* branch to not-null successor.
+ // Insert an *unconditional* branch to not-null successor - we expect
+ // block placement to remove fallthroughs later.
TII->insertBranch(*NC.getCheckBlock(), NC.getNotNullSucc(), nullptr,
/*Cond=*/None, DL);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp
index 41eef2fed840..876e1d3f932a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -153,7 +153,7 @@ public:
unsigned Original);
bool rmFromMergeableSpills(MachineInstr &Spill, int StackSlot);
void hoistAllSpills();
- void LRE_DidCloneVirtReg(unsigned, unsigned) override;
+ void LRE_DidCloneVirtReg(Register, Register) override;
};
class InlineSpiller : public Spiller {
@@ -269,6 +269,14 @@ static Register isFullCopyOf(const MachineInstr &MI, Register Reg) {
return Register();
}
+static void getVDefInterval(const MachineInstr &MI, LiveIntervals &LIS) {
+ for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
+ const MachineOperand &MO = MI.getOperand(I);
+ if (MO.isReg() && MO.isDef() && Register::isVirtualRegister(MO.getReg()))
+ LIS.getInterval(MO.getReg());
+ }
+}
+
/// isSnippet - Identify if a live interval is a snippet that should be spilled.
/// It is assumed that SnipLI is a virtual register with the same original as
/// Edit->getReg().
@@ -289,8 +297,9 @@ bool InlineSpiller::isSnippet(const LiveInterval &SnipLI) {
// Check that all uses satisfy our criteria.
for (MachineRegisterInfo::reg_instr_nodbg_iterator
- RI = MRI.reg_instr_nodbg_begin(SnipLI.reg),
- E = MRI.reg_instr_nodbg_end(); RI != E; ) {
+ RI = MRI.reg_instr_nodbg_begin(SnipLI.reg()),
+ E = MRI.reg_instr_nodbg_end();
+ RI != E;) {
MachineInstr &MI = *RI++;
// Allow copies to/from Reg.
@@ -299,11 +308,11 @@ bool InlineSpiller::isSnippet(const LiveInterval &SnipLI) {
// Allow stack slot loads.
int FI;
- if (SnipLI.reg == TII.isLoadFromStackSlot(MI, FI) && FI == StackSlot)
+ if (SnipLI.reg() == TII.isLoadFromStackSlot(MI, FI) && FI == StackSlot)
continue;
// Allow stack slot stores.
- if (SnipLI.reg == TII.isStoreToStackSlot(MI, FI) && FI == StackSlot)
+ if (SnipLI.reg() == TII.isStoreToStackSlot(MI, FI) && FI == StackSlot)
continue;
// Allow a single additional instruction.
@@ -409,14 +418,21 @@ bool InlineSpiller::hoistSpillInsideBB(LiveInterval &SpillLI,
MII = DefMI;
++MII;
}
+ MachineInstrSpan MIS(MII, MBB);
// Insert spill without kill flag immediately after def.
TII.storeRegToStackSlot(*MBB, MII, SrcReg, false, StackSlot,
MRI.getRegClass(SrcReg), &TRI);
+ LIS.InsertMachineInstrRangeInMaps(MIS.begin(), MII);
+ for (const MachineInstr &MI : make_range(MIS.begin(), MII))
+ getVDefInterval(MI, LIS);
--MII; // Point to store instruction.
- LIS.InsertMachineInstrInMaps(*MII);
LLVM_DEBUG(dbgs() << "\thoisted: " << SrcVNI->def << '\t' << *MII);
- HSpiller.addToMergeableSpills(*MII, StackSlot, Original);
+ // If there is only 1 store instruction is required for spill, add it
+ // to mergeable list. In X86 AMX, 2 intructions are required to store.
+ // We disable the merge for this case.
+ if (MIS.begin() == MII)
+ HSpiller.addToMergeableSpills(*MII, StackSlot, Original);
++NumSpills;
return true;
}
@@ -432,7 +448,7 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) {
do {
LiveInterval *LI;
std::tie(LI, VNI) = WorkList.pop_back_val();
- Register Reg = LI->reg;
+ Register Reg = LI->reg();
LLVM_DEBUG(dbgs() << "Checking redundant spills for " << VNI->id << '@'
<< VNI->def << " in " << *LI << '\n');
@@ -511,7 +527,7 @@ void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) {
if (!SnippetCopies.count(MI))
continue;
LiveInterval &SnipLI = LIS.getInterval(MI->getOperand(1).getReg());
- assert(isRegToSpill(SnipLI.reg) && "Unexpected register in copy");
+ assert(isRegToSpill(SnipLI.reg()) && "Unexpected register in copy");
VNInfo *SnipVNI = SnipLI.getVNInfoAt(VNI->def.getRegSlot(true));
assert(SnipVNI && "Snippet undefined before copy");
WorkList.push_back(std::make_pair(&SnipLI, SnipVNI));
@@ -556,7 +572,7 @@ bool InlineSpiller::canGuaranteeAssignmentAfterRemat(Register VReg,
bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) {
// Analyze instruction
SmallVector<std::pair<MachineInstr *, unsigned>, 8> Ops;
- VirtRegInfo RI = AnalyzeVirtRegInBundle(MI, VirtReg.reg, &Ops);
+ VirtRegInfo RI = AnalyzeVirtRegInBundle(MI, VirtReg.reg(), &Ops);
if (!RI.Reads)
return false;
@@ -568,7 +584,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) {
LLVM_DEBUG(dbgs() << "\tadding <undef> flags: ");
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI.getOperand(i);
- if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg)
+ if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg())
MO.setIsUndef();
}
LLVM_DEBUG(dbgs() << UseIdx << '\t' << MI);
@@ -608,7 +624,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) {
// If we can't guarantee that we'll be able to actually assign the new vreg,
// we can't remat.
- if (!canGuaranteeAssignmentAfterRemat(VirtReg.reg, MI)) {
+ if (!canGuaranteeAssignmentAfterRemat(VirtReg.reg(), MI)) {
markValueUsed(&VirtReg, ParentVNI);
LLVM_DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << MI);
return false;
@@ -633,7 +649,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) {
// Replace operands
for (const auto &OpPair : Ops) {
MachineOperand &MO = OpPair.first->getOperand(OpPair.second);
- if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg) {
+ if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg()) {
MO.setReg(NewVReg);
MO.setIsKill();
}
@@ -810,6 +826,14 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops,
bool WasCopy = MI->isCopy();
Register ImpReg;
+ // TII::foldMemoryOperand will do what we need here for statepoint
+ // (fold load into use and remove corresponding def). We will replace
+ // uses of removed def with loads (spillAroundUses).
+ // For that to work we need to untie def and use to pass it through
+ // foldMemoryOperand and signal foldPatchpoint that it is allowed to
+ // fold them.
+ bool UntieRegs = MI->getOpcode() == TargetOpcode::STATEPOINT;
+
// Spill subregs if the target allows it.
// We always want to spill subregs for stackmap/patchpoint pseudos.
bool SpillSubRegs = TII.isSubregFoldable() ||
@@ -835,7 +859,7 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops,
if (LoadMI && MO.isDef())
return false;
// Tied use operands should not be passed to foldMemoryOperand.
- if (!MI->isRegTiedToDefOperand(Idx))
+ if (UntieRegs || !MI->isRegTiedToDefOperand(Idx))
FoldOps.push_back(Idx);
}
@@ -846,11 +870,31 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops,
MachineInstrSpan MIS(MI, MI->getParent());
+ SmallVector<std::pair<unsigned, unsigned> > TiedOps;
+ if (UntieRegs)
+ for (unsigned Idx : FoldOps) {
+ MachineOperand &MO = MI->getOperand(Idx);
+ if (!MO.isTied())
+ continue;
+ unsigned Tied = MI->findTiedOperandIdx(Idx);
+ if (MO.isUse())
+ TiedOps.emplace_back(Tied, Idx);
+ else {
+ assert(MO.isDef() && "Tied to not use and def?");
+ TiedOps.emplace_back(Idx, Tied);
+ }
+ MI->untieRegOperand(Idx);
+ }
+
MachineInstr *FoldMI =
LoadMI ? TII.foldMemoryOperand(*MI, FoldOps, *LoadMI, &LIS)
: TII.foldMemoryOperand(*MI, FoldOps, StackSlot, &LIS, &VRM);
- if (!FoldMI)
+ if (!FoldMI) {
+ // Re-tie operands.
+ for (auto Tied : TiedOps)
+ MI->tieOperands(Tied.first, Tied.second);
return false;
+ }
// Remove LIS for any dead defs in the original MI not in FoldMI.
for (MIBundleOperands MO(*MI); MO.isValid(); ++MO) {
@@ -869,7 +913,7 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops,
// FoldMI does not define this physreg. Remove the LI segment.
assert(MO->isDead() && "Cannot fold physreg def");
SlotIndex Idx = LIS.getInstructionIndex(*MI).getRegSlot();
- LIS.removePhysRegDefAt(Reg, Idx);
+ LIS.removePhysRegDefAt(Reg.asMCReg(), Idx);
}
int FI;
@@ -906,7 +950,11 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops,
++NumFolded;
else if (Ops.front().second == 0) {
++NumSpills;
- HSpiller.addToMergeableSpills(*FoldMI, StackSlot, Original);
+ // If there is only 1 store instruction is required for spill, add it
+ // to mergeable list. In X86 AMX, 2 intructions are required to store.
+ // We disable the merge for this case.
+ if (std::distance(MIS.begin(), MIS.end()) <= 1)
+ HSpiller.addToMergeableSpills(*FoldMI, StackSlot, Original);
} else
++NumReloads;
return true;
@@ -953,6 +1001,7 @@ void InlineSpiller::insertSpill(Register NewVReg, bool isKill,
MachineInstrSpan MIS(MI, &MBB);
MachineBasicBlock::iterator SpillBefore = std::next(MI);
bool IsRealSpill = isRealSpill(*MI);
+
if (IsRealSpill)
TII.storeRegToStackSlot(MBB, SpillBefore, NewVReg, isKill, StackSlot,
MRI.getRegClass(NewVReg), &TRI);
@@ -966,11 +1015,16 @@ void InlineSpiller::insertSpill(Register NewVReg, bool isKill,
MachineBasicBlock::iterator Spill = std::next(MI);
LIS.InsertMachineInstrRangeInMaps(Spill, MIS.end());
+ for (const MachineInstr &MI : make_range(Spill, MIS.end()))
+ getVDefInterval(MI, LIS);
LLVM_DEBUG(
dumpMachineInstrRangeWithSlotIndex(Spill, MIS.end(), LIS, "spill"));
++NumSpills;
- if (IsRealSpill)
+ // If there is only 1 store instruction is required for spill, add it
+ // to mergeable list. In X86 AMX, 2 intructions are required to store.
+ // We disable the merge for this case.
+ if (IsRealSpill && std::distance(Spill, MIS.end()) <= 1)
HSpiller.addToMergeableSpills(*Spill, StackSlot, Original);
}
@@ -1160,7 +1214,7 @@ void HoistSpillHelper::addToMergeableSpills(MachineInstr &Spill, int StackSlot,
// save a copy of LiveInterval in StackSlotToOrigLI because the original
// LiveInterval may be cleared after all its references are spilled.
if (StackSlotToOrigLI.find(StackSlot) == StackSlotToOrigLI.end()) {
- auto LI = std::make_unique<LiveInterval>(OrigLI.reg, OrigLI.weight);
+ auto LI = std::make_unique<LiveInterval>(OrigLI.reg(), OrigLI.weight());
LI->assign(OrigLI, Allocator);
StackSlotToOrigLI[StackSlot] = std::move(LI);
}
@@ -1188,7 +1242,7 @@ bool HoistSpillHelper::rmFromMergeableSpills(MachineInstr &Spill,
bool HoistSpillHelper::isSpillCandBB(LiveInterval &OrigLI, VNInfo &OrigVNI,
MachineBasicBlock &BB, Register &LiveReg) {
SlotIndex Idx;
- Register OrigReg = OrigLI.reg;
+ Register OrigReg = OrigLI.reg();
MachineBasicBlock::iterator MI = IPA.getLastInsertPointIter(OrigLI, BB);
if (MI != BB.end())
Idx = LIS.getInstructionIndex(*MI);
@@ -1516,10 +1570,13 @@ void HoistSpillHelper::hoistAllSpills() {
for (auto const &Insert : SpillsToIns) {
MachineBasicBlock *BB = Insert.first;
Register LiveReg = Insert.second;
- MachineBasicBlock::iterator MI = IPA.getLastInsertPointIter(OrigLI, *BB);
- TII.storeRegToStackSlot(*BB, MI, LiveReg, false, Slot,
+ MachineBasicBlock::iterator MII = IPA.getLastInsertPointIter(OrigLI, *BB);
+ MachineInstrSpan MIS(MII, BB);
+ TII.storeRegToStackSlot(*BB, MII, LiveReg, false, Slot,
MRI.getRegClass(LiveReg), &TRI);
- LIS.InsertMachineInstrRangeInMaps(std::prev(MI), MI);
+ LIS.InsertMachineInstrRangeInMaps(MIS.begin(), MII);
+ for (const MachineInstr &MI : make_range(MIS.begin(), MII))
+ getVDefInterval(MI, LIS);
++NumSpills;
}
@@ -1539,11 +1596,13 @@ void HoistSpillHelper::hoistAllSpills() {
/// For VirtReg clone, the \p New register should have the same physreg or
/// stackslot as the \p old register.
-void HoistSpillHelper::LRE_DidCloneVirtReg(unsigned New, unsigned Old) {
+void HoistSpillHelper::LRE_DidCloneVirtReg(Register New, Register Old) {
if (VRM.hasPhys(Old))
VRM.assignVirt2Phys(New, VRM.getPhys(Old));
else if (VRM.getStackSlot(Old) != VirtRegMap::NO_STACK_SLOT)
VRM.assignVirt2StackSlot(New, VRM.getStackSlot(Old));
else
llvm_unreachable("VReg should be assigned either physreg or stackslot");
+ if (VRM.hasShape(Old))
+ VRM.assignVirt2Shape(New, VRM.getShape(Old));
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InterferenceCache.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InterferenceCache.cpp
index 7b50dac4cd1a..a56485cdbc67 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/InterferenceCache.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/InterferenceCache.cpp
@@ -12,19 +12,15 @@
#include "InterferenceCache.h"
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/CodeGen/LiveInterval.h"
-#include "llvm/CodeGen/LiveIntervalUnion.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include <cassert>
#include <cstdint>
-#include <cstdlib>
#include <tuple>
using namespace llvm;
@@ -64,8 +60,8 @@ void InterferenceCache::init(MachineFunction *mf,
Entries[i].clear(mf, indexes, lis);
}
-InterferenceCache::Entry *InterferenceCache::get(unsigned PhysReg) {
- unsigned E = PhysRegEntries[PhysReg];
+InterferenceCache::Entry *InterferenceCache::get(MCRegister PhysReg) {
+ unsigned char E = PhysRegEntries[PhysReg.id()];
if (E < CacheEntries && Entries[E].getPhysReg() == PhysReg) {
if (!Entries[E].valid(LIUArray, TRI))
Entries[E].revalidate(LIUArray, TRI);
@@ -101,7 +97,7 @@ void InterferenceCache::Entry::revalidate(LiveIntervalUnion *LIUArray,
RegUnits[i].VirtTag = LIUArray[*Units].getTag();
}
-void InterferenceCache::Entry::reset(unsigned physReg,
+void InterferenceCache::Entry::reset(MCRegister physReg,
LiveIntervalUnion *LIUArray,
const TargetRegisterInfo *TRI,
const MachineFunction *MF) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InterferenceCache.h b/contrib/llvm-project/llvm/lib/CodeGen/InterferenceCache.h
index 9019e9f61fa0..ace1691c1363 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/InterferenceCache.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/InterferenceCache.h
@@ -44,7 +44,7 @@ class LLVM_LIBRARY_VISIBILITY InterferenceCache {
/// of PhysReg in all basic blocks.
class Entry {
/// PhysReg - The register currently represented.
- unsigned PhysReg = 0;
+ MCRegister PhysReg = 0;
/// Tag - Cache tag is changed when any of the underlying LiveIntervalUnions
/// change.
@@ -102,13 +102,13 @@ class LLVM_LIBRARY_VISIBILITY InterferenceCache {
void clear(MachineFunction *mf, SlotIndexes *indexes, LiveIntervals *lis) {
assert(!hasRefs() && "Cannot clear cache entry with references");
- PhysReg = 0;
+ PhysReg = MCRegister::NoRegister;
MF = mf;
Indexes = indexes;
LIS = lis;
}
- unsigned getPhysReg() const { return PhysReg; }
+ MCRegister getPhysReg() const { return PhysReg; }
void addRef(int Delta) { RefCount += Delta; }
@@ -120,10 +120,8 @@ class LLVM_LIBRARY_VISIBILITY InterferenceCache {
bool valid(LiveIntervalUnion *LIUArray, const TargetRegisterInfo *TRI);
/// reset - Initialize entry to represent physReg's aliases.
- void reset(unsigned physReg,
- LiveIntervalUnion *LIUArray,
- const TargetRegisterInfo *TRI,
- const MachineFunction *MF);
+ void reset(MCRegister physReg, LiveIntervalUnion *LIUArray,
+ const TargetRegisterInfo *TRI, const MachineFunction *MF);
/// get - Return an up to date BlockInterference.
BlockInterference *get(unsigned MBBNum) {
@@ -154,7 +152,7 @@ class LLVM_LIBRARY_VISIBILITY InterferenceCache {
Entry Entries[CacheEntries];
// get - Get a valid entry for PhysReg.
- Entry *get(unsigned PhysReg);
+ Entry *get(MCRegister PhysReg);
public:
InterferenceCache() = default;
@@ -207,11 +205,11 @@ public:
~Cursor() { setEntry(nullptr); }
/// setPhysReg - Point this cursor to PhysReg's interference.
- void setPhysReg(InterferenceCache &Cache, unsigned PhysReg) {
+ void setPhysReg(InterferenceCache &Cache, MCRegister PhysReg) {
// Release reference before getting a new one. That guarantees we can
// actually have CacheEntries live cursors.
setEntry(nullptr);
- if (PhysReg)
+ if (PhysReg.isValid())
setEntry(Cache.get(PhysReg));
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index c4d83547a06c..b22e6faeb91c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -22,8 +22,8 @@
//
// E.g. An interleaved load (Factor = 2):
// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
-// %v0 = shuffle <8 x i32> %wide.vec, <8 x i32> undef, <0, 2, 4, 6>
-// %v1 = shuffle <8 x i32> %wide.vec, <8 x i32> undef, <1, 3, 5, 7>
+// %v0 = shuffle <8 x i32> %wide.vec, <8 x i32> poison, <0, 2, 4, 6>
+// %v1 = shuffle <8 x i32> %wide.vec, <8 x i32> poison, <1, 3, 5, 7>
//
// It could be transformed into a ld2 intrinsic in AArch64 backend or a vld2
// intrinsic in ARM backend.
@@ -66,6 +66,7 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <cassert>
#include <utility>
@@ -118,6 +119,15 @@ private:
/// replacements are also performed.
bool tryReplaceExtracts(ArrayRef<ExtractElementInst *> Extracts,
ArrayRef<ShuffleVectorInst *> Shuffles);
+
+ /// Given a number of shuffles of the form shuffle(binop(x,y)), convert them
+ /// to binop(shuffle(x), shuffle(y)) to allow the formation of an
+ /// interleaving load. Any newly created shuffles that operate on \p LI will
+ /// be added to \p Shuffles. Returns true, if any changes to the IR have been
+ /// made.
+ bool replaceBinOpShuffles(ArrayRef<ShuffleVectorInst *> BinOpShuffles,
+ SmallVectorImpl<ShuffleVectorInst *> &Shuffles,
+ LoadInst *LI);
};
} // end anonymous namespace.
@@ -283,67 +293,97 @@ bool InterleavedAccess::lowerInterleavedLoad(
if (!LI->isSimple() || isa<ScalableVectorType>(LI->getType()))
return false;
+ // Check if all users of this load are shufflevectors. If we encounter any
+ // users that are extractelement instructions or binary operators, we save
+ // them to later check if they can be modified to extract from one of the
+ // shufflevectors instead of the load.
+
SmallVector<ShuffleVectorInst *, 4> Shuffles;
SmallVector<ExtractElementInst *, 4> Extracts;
+ // BinOpShuffles need to be handled a single time in case both operands of the
+ // binop are the same load.
+ SmallSetVector<ShuffleVectorInst *, 4> BinOpShuffles;
- // Check if all users of this load are shufflevectors. If we encounter any
- // users that are extractelement instructions, we save them to later check if
- // they can be modifed to extract from one of the shufflevectors instead of
- // the load.
- for (auto UI = LI->user_begin(), E = LI->user_end(); UI != E; UI++) {
- auto *Extract = dyn_cast<ExtractElementInst>(*UI);
+ for (auto *User : LI->users()) {
+ auto *Extract = dyn_cast<ExtractElementInst>(User);
if (Extract && isa<ConstantInt>(Extract->getIndexOperand())) {
Extracts.push_back(Extract);
continue;
}
- ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(*UI);
+ auto *BI = dyn_cast<BinaryOperator>(User);
+ if (BI && BI->hasOneUse()) {
+ if (auto *SVI = dyn_cast<ShuffleVectorInst>(*BI->user_begin())) {
+ BinOpShuffles.insert(SVI);
+ continue;
+ }
+ }
+ auto *SVI = dyn_cast<ShuffleVectorInst>(User);
if (!SVI || !isa<UndefValue>(SVI->getOperand(1)))
return false;
Shuffles.push_back(SVI);
}
- if (Shuffles.empty())
+ if (Shuffles.empty() && BinOpShuffles.empty())
return false;
unsigned Factor, Index;
unsigned NumLoadElements =
cast<FixedVectorType>(LI->getType())->getNumElements();
+ auto *FirstSVI = Shuffles.size() > 0 ? Shuffles[0] : BinOpShuffles[0];
// Check if the first shufflevector is DE-interleave shuffle.
- if (!isDeInterleaveMask(Shuffles[0]->getShuffleMask(), Factor, Index,
- MaxFactor, NumLoadElements))
+ if (!isDeInterleaveMask(FirstSVI->getShuffleMask(), Factor, Index, MaxFactor,
+ NumLoadElements))
return false;
// Holds the corresponding index for each DE-interleave shuffle.
SmallVector<unsigned, 4> Indices;
- Indices.push_back(Index);
- Type *VecTy = Shuffles[0]->getType();
+ Type *VecTy = FirstSVI->getType();
// Check if other shufflevectors are also DE-interleaved of the same type
// and factor as the first shufflevector.
- for (unsigned i = 1; i < Shuffles.size(); i++) {
- if (Shuffles[i]->getType() != VecTy)
+ for (auto *Shuffle : Shuffles) {
+ if (Shuffle->getType() != VecTy)
return false;
-
- if (!isDeInterleaveMaskOfFactor(Shuffles[i]->getShuffleMask(), Factor,
+ if (!isDeInterleaveMaskOfFactor(Shuffle->getShuffleMask(), Factor,
Index))
return false;
+ assert(Shuffle->getShuffleMask().size() <= NumLoadElements);
Indices.push_back(Index);
}
+ for (auto *Shuffle : BinOpShuffles) {
+ if (Shuffle->getType() != VecTy)
+ return false;
+ if (!isDeInterleaveMaskOfFactor(Shuffle->getShuffleMask(), Factor,
+ Index))
+ return false;
+
+ assert(Shuffle->getShuffleMask().size() <= NumLoadElements);
+
+ if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(0) == LI)
+ Indices.push_back(Index);
+ if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(1) == LI)
+ Indices.push_back(Index);
+ }
// Try and modify users of the load that are extractelement instructions to
// use the shufflevector instructions instead of the load.
if (!tryReplaceExtracts(Extracts, Shuffles))
return false;
+ bool BinOpShuffleChanged =
+ replaceBinOpShuffles(BinOpShuffles.getArrayRef(), Shuffles, LI);
+
LLVM_DEBUG(dbgs() << "IA: Found an interleaved load: " << *LI << "\n");
// Try to create target specific intrinsics to replace the load and shuffles.
- if (!TLI->lowerInterleavedLoad(LI, Shuffles, Indices, Factor))
- return false;
+ if (!TLI->lowerInterleavedLoad(LI, Shuffles, Indices, Factor)) {
+ // If Extracts is not empty, tryReplaceExtracts made changes earlier.
+ return !Extracts.empty() || BinOpShuffleChanged;
+ }
for (auto SVI : Shuffles)
DeadInsts.push_back(SVI);
@@ -352,6 +392,39 @@ bool InterleavedAccess::lowerInterleavedLoad(
return true;
}
+bool InterleavedAccess::replaceBinOpShuffles(
+ ArrayRef<ShuffleVectorInst *> BinOpShuffles,
+ SmallVectorImpl<ShuffleVectorInst *> &Shuffles, LoadInst *LI) {
+ for (auto *SVI : BinOpShuffles) {
+ BinaryOperator *BI = cast<BinaryOperator>(SVI->getOperand(0));
+ Type *BIOp0Ty = BI->getOperand(0)->getType();
+ ArrayRef<int> Mask = SVI->getShuffleMask();
+ assert(all_of(Mask, [&](int Idx) {
+ return Idx < (int)cast<FixedVectorType>(BIOp0Ty)->getNumElements();
+ }));
+
+ auto *NewSVI1 =
+ new ShuffleVectorInst(BI->getOperand(0), PoisonValue::get(BIOp0Ty),
+ Mask, SVI->getName(), SVI);
+ auto *NewSVI2 = new ShuffleVectorInst(
+ BI->getOperand(1), PoisonValue::get(BI->getOperand(1)->getType()), Mask,
+ SVI->getName(), SVI);
+ Value *NewBI = BinaryOperator::Create(BI->getOpcode(), NewSVI1, NewSVI2,
+ BI->getName(), SVI);
+ SVI->replaceAllUsesWith(NewBI);
+ LLVM_DEBUG(dbgs() << " Replaced: " << *BI << "\n And : " << *SVI
+ << "\n With : " << *NewSVI1 << "\n And : "
+ << *NewSVI2 << "\n And : " << *NewBI << "\n");
+ RecursivelyDeleteTriviallyDeadInstructions(SVI);
+ if (NewSVI1->getOperand(0) == LI)
+ Shuffles.push_back(NewSVI1);
+ if (NewSVI2->getOperand(0) == LI)
+ Shuffles.push_back(NewSVI2);
+ }
+
+ return !BinOpShuffles.empty();
+}
+
bool InterleavedAccess::tryReplaceExtracts(
ArrayRef<ExtractElementInst *> Extracts,
ArrayRef<ShuffleVectorInst *> Shuffles) {
@@ -421,7 +494,7 @@ bool InterleavedAccess::lowerInterleavedStore(
if (!SI->isSimple())
return false;
- ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(SI->getValueOperand());
+ auto *SVI = dyn_cast<ShuffleVectorInst>(SI->getValueOperand());
if (!SVI || !SVI->hasOneUse() || isa<ScalableVectorType>(SVI->getType()))
return false;
@@ -461,10 +534,10 @@ bool InterleavedAccess::runOnFunction(Function &F) {
bool Changed = false;
for (auto &I : instructions(F)) {
- if (LoadInst *LI = dyn_cast<LoadInst>(&I))
+ if (auto *LI = dyn_cast<LoadInst>(&I))
Changed |= lowerInterleavedLoad(LI, DeadInsts);
- if (StoreInst *SI = dyn_cast<StoreInst>(&I))
+ if (auto *SI = dyn_cast<StoreInst>(&I))
Changed |= lowerInterleavedStore(SI, DeadInsts);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
index f7131926ee65..ff3f93d51ea8 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
@@ -1104,10 +1104,8 @@ InterleavedLoadCombineImpl::findFirstLoad(const std::set<LoadInst *> &LIs) {
// All LIs are within the same BB. Select the first for a reference.
BasicBlock *BB = (*LIs.begin())->getParent();
- BasicBlock::iterator FLI =
- std::find_if(BB->begin(), BB->end(), [&LIs](Instruction &I) -> bool {
- return is_contained(LIs, &I);
- });
+ BasicBlock::iterator FLI = llvm::find_if(
+ *BB, [&LIs](Instruction &I) -> bool { return is_contained(LIs, &I); });
assert(FLI != BB->end());
return cast<LoadInst>(FLI);
@@ -1130,8 +1128,8 @@ bool InterleavedLoadCombineImpl::combine(std::list<VectorInfo> &InterleavedLoad,
std::set<Instruction *> Is;
std::set<Instruction *> SVIs;
- unsigned InterleavedCost;
- unsigned InstructionCost = 0;
+ InstructionCost InterleavedCost;
+ InstructionCost InstructionCost = 0;
// Get the interleave factor
unsigned Factor = InterleavedLoad.size();
@@ -1174,6 +1172,10 @@ bool InterleavedLoadCombineImpl::combine(std::list<VectorInfo> &InterleavedLoad,
}
}
+ // We need to have a valid cost in order to proceed.
+ if (!InstructionCost.isValid())
+ return false;
+
// We know that all LoadInst are within the same BB. This guarantees that
// either everything or nothing is loaded.
LoadInst *First = findFirstLoad(LIs);
@@ -1236,8 +1238,7 @@ bool InterleavedLoadCombineImpl::combine(std::list<VectorInfo> &InterleavedLoad,
Mask.push_back(i + j * Factor);
Builder.SetInsertPoint(VI.SVI);
- auto SVI = Builder.CreateShuffleVector(LI, UndefValue::get(LI->getType()),
- Mask, "interleaved.shuffle");
+ auto SVI = Builder.CreateShuffleVector(LI, Mask, "interleaved.shuffle");
VI.SVI->replaceAllUsesWith(SVI);
i++;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/IntrinsicLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/IntrinsicLowering.cpp
index e37c21e76597..55089d3b90d0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/IntrinsicLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/IntrinsicLowering.cpp
@@ -329,6 +329,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
break;
case Intrinsic::assume:
+ case Intrinsic::experimental_noalias_scope_decl:
case Intrinsic::var_annotation:
break; // Strip out these intrinsics
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LLVMTargetMachine.cpp
index b485f2cf7261..f9b7bf613ff6 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LLVMTargetMachine.cpp
@@ -40,13 +40,16 @@ static cl::opt<bool> EnableTrapUnreachable("trap-unreachable",
void LLVMTargetMachine::initAsmInfo() {
MRI.reset(TheTarget.createMCRegInfo(getTargetTriple().str()));
+ assert(MRI && "Unable to create reg info");
MII.reset(TheTarget.createMCInstrInfo());
+ assert(MII && "Unable to create instruction info");
// FIXME: Having an MCSubtargetInfo on the target machine is a hack due
// to some backends having subtarget feature dependent module level
// code generation. This is similar to the hack in the AsmPrinter for
// module level assembly etc.
STI.reset(TheTarget.createMCSubtargetInfo(
getTargetTriple().str(), getTargetCPU(), getTargetFeatureString()));
+ assert(STI && "Unable to create subtarget info");
MCAsmInfo *TmpAsmInfo = TheTarget.createMCAsmInfo(
*MRI, getTargetTriple().str(), Options.MCOptions);
@@ -58,6 +61,9 @@ void LLVMTargetMachine::initAsmInfo() {
"Make sure you include the correct TargetSelect.h"
"and that InitializeAllTargetMCs() is being invoked!");
+ if (Options.BinutilsVersion.first > 0)
+ TmpAsmInfo->setBinutilsVersion(Options.BinutilsVersion);
+
if (Options.DisableIntegratedAS)
TmpAsmInfo->setUseIntegratedAssembler(false);
@@ -118,6 +124,24 @@ bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM,
raw_pwrite_stream *DwoOut,
CodeGenFileType FileType,
MCContext &Context) {
+ Expected<std::unique_ptr<MCStreamer>> MCStreamerOrErr =
+ createMCStreamer(Out, DwoOut, FileType, Context);
+ if (auto Err = MCStreamerOrErr.takeError())
+ return true;
+
+ // Create the AsmPrinter, which takes ownership of AsmStreamer if successful.
+ FunctionPass *Printer =
+ getTarget().createAsmPrinter(*this, std::move(*MCStreamerOrErr));
+ if (!Printer)
+ return true;
+
+ PM.add(Printer);
+ return false;
+}
+
+Expected<std::unique_ptr<MCStreamer>> LLVMTargetMachine::createMCStreamer(
+ raw_pwrite_stream &Out, raw_pwrite_stream *DwoOut, CodeGenFileType FileType,
+ MCContext &Context) {
if (Options.MCOptions.MCSaveTempLabels)
Context.setAllowTemporaryLabels(false);
@@ -152,10 +176,14 @@ bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM,
// Create the code emitter for the target if it exists. If not, .o file
// emission fails.
MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(MII, MRI, Context);
+ if (!MCE)
+ return make_error<StringError>("createMCCodeEmitter failed",
+ inconvertibleErrorCode());
MCAsmBackend *MAB =
getTarget().createMCAsmBackend(STI, MRI, Options.MCOptions);
- if (!MCE || !MAB)
- return true;
+ if (!MAB)
+ return make_error<StringError>("createMCAsmBackend failed",
+ inconvertibleErrorCode());
Triple T(getTargetTriple().str());
AsmStreamer.reset(getTarget().createMCObjectStreamer(
@@ -174,14 +202,7 @@ bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM,
break;
}
- // Create the AsmPrinter, which takes ownership of AsmStreamer if successful.
- FunctionPass *Printer =
- getTarget().createAsmPrinter(*this, std::move(AsmStreamer));
- if (!Printer)
- return true;
-
- PM.add(Printer);
- return false;
+ return std::move(AsmStreamer);
}
bool LLVMTargetMachine::addPassesToEmitFile(
@@ -196,20 +217,14 @@ bool LLVMTargetMachine::addPassesToEmitFile(
if (!PassConfig)
return true;
- if (!TargetPassConfig::willCompleteCodeGenPipeline()) {
- if (this->getTargetTriple().isOSAIX()) {
- // On AIX, we might manifest MCSymbols during SDAG lowering. For MIR
- // testing to be meaningful, we need to ensure that the symbols created
- // are MCSymbolXCOFF variants, which requires that
- // the TargetLoweringObjectFile instance has been initialized.
- MCContext &Ctx = MMIWP->getMMI().getContext();
- const_cast<TargetLoweringObjectFile &>(*this->getObjFileLowering())
- .Initialize(Ctx, *this);
- }
- PM.add(createPrintMIRPass(Out));
- } else if (addAsmPrinter(PM, Out, DwoOut, FileType,
- MMIWP->getMMI().getContext()))
- return true;
+ if (TargetPassConfig::willCompleteCodeGenPipeline()) {
+ if (addAsmPrinter(PM, Out, DwoOut, FileType, MMIWP->getMMI().getContext()))
+ return true;
+ } else {
+ // MIR printing is redundant with -filetype=null.
+ if (FileType != CGFT_Null)
+ PM.add(createPrintMIRPass(Out));
+ }
PM.add(createFreeMachineFunctionPass());
return false;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LexicalScopes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LexicalScopes.cpp
index 690b429832a5..8139c2cbb6cd 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LexicalScopes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LexicalScopes.cpp
@@ -324,7 +324,7 @@ bool LexicalScopes::dominates(const DILocation *DL, MachineBasicBlock *MBB) {
Set = std::make_unique<BlockSetT>();
getMachineBasicBlocks(DL, *Set);
}
- return Set->count(MBB) != 0;
+ return Set->contains(MBB);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
new file mode 100644
index 000000000000..18ffe8ba0669
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
@@ -0,0 +1,3363 @@
+//===- InstrRefBasedImpl.cpp - Tracking Debug Value MIs -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file InstrRefBasedImpl.cpp
+///
+/// This is a separate implementation of LiveDebugValues, see
+/// LiveDebugValues.cpp and VarLocBasedImpl.cpp for more information.
+///
+/// This pass propagates variable locations between basic blocks, resolving
+/// control flow conflicts between them. The problem is much like SSA
+/// construction, where each DBG_VALUE instruction assigns the *value* that
+/// a variable has, and every instruction where the variable is in scope uses
+/// that variable. The resulting map of instruction-to-value is then translated
+/// into a register (or spill) location for each variable over each instruction.
+///
+/// This pass determines which DBG_VALUE dominates which instructions, or if
+/// none do, where values must be merged (like PHI nodes). The added
+/// complication is that because codegen has already finished, a PHI node may
+/// be needed for a variable location to be correct, but no register or spill
+/// slot merges the necessary values. In these circumstances, the variable
+/// location is dropped.
+///
+/// What makes this analysis non-trivial is loops: we cannot tell in advance
+/// whether a variable location is live throughout a loop, or whether its
+/// location is clobbered (or redefined by another DBG_VALUE), without
+/// exploring all the way through.
+///
+/// To make this simpler we perform two kinds of analysis. First, we identify
+/// every value defined by every instruction (ignoring those that only move
+/// another value), then compute a map of which values are available for each
+/// instruction. This is stronger than a reaching-def analysis, as we create
+/// PHI values where other values merge.
+///
+/// Secondly, for each variable, we effectively re-construct SSA using each
+/// DBG_VALUE as a def. The DBG_VALUEs read a value-number computed by the
+/// first analysis from the location they refer to. We can then compute the
+/// dominance frontiers of where a variable has a value, and create PHI nodes
+/// where they merge.
+/// This isn't precisely SSA-construction though, because the function shape
+/// is pre-defined. If a variable location requires a PHI node, but no
+/// PHI for the relevant values is present in the function (as computed by the
+/// first analysis), the location must be dropped.
+///
+/// Once both are complete, we can pass back over all instructions knowing:
+/// * What _value_ each variable should contain, either defined by an
+/// instruction or where control flow merges
+/// * What the location of that value is (if any).
+/// Allowing us to create appropriate live-in DBG_VALUEs, and DBG_VALUEs when
+/// a value moves location. After this pass runs, all variable locations within
+/// a block should be specified by DBG_VALUEs within that block, allowing
+/// DbgEntityHistoryCalculator to focus on individual blocks.
+///
+/// This pass is able to go fast because the size of the first
+/// reaching-definition analysis is proportional to the working-set size of
+/// the function, which the compiler tries to keep small. (It's also
+/// proportional to the number of blocks). Additionally, we repeatedly perform
+/// the second reaching-definition analysis with only the variables and blocks
+/// in a single lexical scope, exploiting their locality.
+///
+/// Determining where PHIs happen is trickier with this approach, and it comes
+/// to a head in the major problem for LiveDebugValues: is a value live-through
+/// a loop, or not? Your garden-variety dataflow analysis aims to build a set of
+/// facts about a function, however this analysis needs to generate new value
+/// numbers at joins.
+///
+/// To do this, consider a lattice of all definition values, from instructions
+/// and from PHIs. Each PHI is characterised by the RPO number of the block it
+/// occurs in. Each value pair A, B can be ordered by RPO(A) < RPO(B):
+/// with non-PHI values at the top, and any PHI value in the last block (by RPO
+/// order) at the bottom.
+///
+/// (Awkwardly: lower-down-the _lattice_ means a greater RPO _number_. Below,
+/// "rank" always refers to the former).
+///
+/// At any join, for each register, we consider:
+/// * All incoming values, and
+/// * The PREVIOUS live-in value at this join.
+/// If all incoming values agree: that's the live-in value. If they do not, the
+/// incoming values are ranked according to the partial order, and the NEXT
+/// LOWEST rank after the PREVIOUS live-in value is picked (multiple values of
+/// the same rank are ignored as conflicting). If there are no candidate values,
+/// or if the rank of the live-in would be lower than the rank of the current
+/// blocks PHIs, create a new PHI value.
+///
+/// Intuitively: if it's not immediately obvious what value a join should result
+/// in, we iteratively descend from instruction-definitions down through PHI
+/// values, getting closer to the current block each time. If the current block
+/// is a loop head, this ordering is effectively searching outer levels of
+/// loops, to find a value that's live-through the current loop.
+///
+/// If there is no value that's live-through this loop, a PHI is created for
+/// this location instead. We can't use a lower-ranked PHI because by definition
+/// it doesn't dominate the current block. We can't create a PHI value any
+/// earlier, because we risk creating a PHI value at a location where values do
+/// not in fact merge, thus misrepresenting the truth, and not making the true
+/// live-through value for variable locations.
+///
+/// This algorithm applies to both calculating the availability of values in
+/// the first analysis, and the location of variables in the second. However
+/// for the second we add an extra dimension of pain: creating a variable
+/// location PHI is only valid if, for each incoming edge,
+/// * There is a value for the variable on the incoming edge, and
+/// * All the edges have that value in the same register.
+/// Or put another way: we can only create a variable-location PHI if there is
+/// a matching machine-location PHI, each input to which is the variables value
+/// in the predecessor block.
+///
+/// To accommodate this difference, each point on the lattice is split in
+/// two: a "proposed" PHI and "definite" PHI. Any PHI that can immediately
+/// have a location determined are "definite" PHIs, and no further work is
+/// needed. Otherwise, a location that all non-backedge predecessors agree
+/// on is picked and propagated as a "proposed" PHI value. If that PHI value
+/// is truly live-through, it'll appear on the loop backedges on the next
+/// dataflow iteration, after which the block live-in moves to be a "definite"
+/// PHI. If it's not truly live-through, the variable value will be downgraded
+/// further as we explore the lattice, or remains "proposed" and is considered
+/// invalid once dataflow completes.
+///
+/// ### Terminology
+///
+/// A machine location is a register or spill slot, a value is something that's
+/// defined by an instruction or PHI node, while a variable value is the value
+/// assigned to a variable. A variable location is a machine location, that must
+/// contain the appropriate variable value. A value that is a PHI node is
+/// occasionally called an mphi.
+///
+/// The first dataflow problem is the "machine value location" problem,
+/// because we're determining which machine locations contain which values.
+/// The "locations" are constant: what's unknown is what value they contain.
+///
+/// The second dataflow problem (the one for variables) is the "variable value
+/// problem", because it's determining what values a variable has, rather than
+/// what location those values are placed in. Unfortunately, it's not that
+/// simple, because producing a PHI value always involves picking a location.
+/// This is an imperfection that we just have to accept, at least for now.
+///
+/// TODO:
+/// Overlapping fragments
+/// Entry values
+/// Add back DEBUG statements for debugging this
+/// Collect statistics
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/UniqueVector.h"
+#include "llvm/CodeGen/LexicalScopes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/TypeSize.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <functional>
+#include <queue>
+#include <tuple>
+#include <utility>
+#include <vector>
+#include <limits.h>
+#include <limits>
+
+#include "LiveDebugValues.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "livedebugvalues"
+
+STATISTIC(NumInserted, "Number of DBG_VALUE instructions inserted");
+STATISTIC(NumRemoved, "Number of DBG_VALUE instructions removed");
+
+// Act more like the VarLoc implementation, by propagating some locations too
+// far and ignoring some transfers.
+static cl::opt<bool> EmulateOldLDV("emulate-old-livedebugvalues", cl::Hidden,
+ cl::desc("Act like old LiveDebugValues did"),
+ cl::init(false));
+
+// Rely on isStoreToStackSlotPostFE and similar to observe all stack spills.
+static cl::opt<bool>
+ ObserveAllStackops("observe-all-stack-ops", cl::Hidden,
+ cl::desc("Allow non-kill spill and restores"),
+ cl::init(false));
+
+namespace {
+
+// The location at which a spilled value resides. It consists of a register and
+// an offset.
+struct SpillLoc {
+ unsigned SpillBase;
+ StackOffset SpillOffset;
+ bool operator==(const SpillLoc &Other) const {
+ return std::make_pair(SpillBase, SpillOffset) ==
+ std::make_pair(Other.SpillBase, Other.SpillOffset);
+ }
+ bool operator<(const SpillLoc &Other) const {
+ return std::make_tuple(SpillBase, SpillOffset.getFixed(),
+ SpillOffset.getScalable()) <
+ std::make_tuple(Other.SpillBase, Other.SpillOffset.getFixed(),
+ Other.SpillOffset.getScalable());
+ }
+};
+
+class LocIdx {
+ unsigned Location;
+
+ // Default constructor is private, initializing to an illegal location number.
+ // Use only for "not an entry" elements in IndexedMaps.
+ LocIdx() : Location(UINT_MAX) { }
+
+public:
+ #define NUM_LOC_BITS 24
+ LocIdx(unsigned L) : Location(L) {
+ assert(L < (1 << NUM_LOC_BITS) && "Machine locations must fit in 24 bits");
+ }
+
+ static LocIdx MakeIllegalLoc() {
+ return LocIdx();
+ }
+
+ bool isIllegal() const {
+ return Location == UINT_MAX;
+ }
+
+ uint64_t asU64() const {
+ return Location;
+ }
+
+ bool operator==(unsigned L) const {
+ return Location == L;
+ }
+
+ bool operator==(const LocIdx &L) const {
+ return Location == L.Location;
+ }
+
+ bool operator!=(unsigned L) const {
+ return !(*this == L);
+ }
+
+ bool operator!=(const LocIdx &L) const {
+ return !(*this == L);
+ }
+
+ bool operator<(const LocIdx &Other) const {
+ return Location < Other.Location;
+ }
+};
+
+class LocIdxToIndexFunctor {
+public:
+ using argument_type = LocIdx;
+ unsigned operator()(const LocIdx &L) const {
+ return L.asU64();
+ }
+};
+
+/// Unique identifier for a value defined by an instruction, as a value type.
+/// Casts back and forth to a uint64_t. Probably replacable with something less
+/// bit-constrained. Each value identifies the instruction and machine location
+/// where the value is defined, although there may be no corresponding machine
+/// operand for it (ex: regmasks clobbering values). The instructions are
+/// one-based, and definitions that are PHIs have instruction number zero.
+///
+/// The obvious limits of a 1M block function or 1M instruction blocks are
+/// problematic; but by that point we should probably have bailed out of
+/// trying to analyse the function.
+class ValueIDNum {
+ uint64_t BlockNo : 20; /// The block where the def happens.
+ uint64_t InstNo : 20; /// The Instruction where the def happens.
+ /// One based, is distance from start of block.
+ uint64_t LocNo : NUM_LOC_BITS; /// The machine location where the def happens.
+
+public:
+ // XXX -- temporarily enabled while the live-in / live-out tables are moved
+ // to something more type-y
+ ValueIDNum() : BlockNo(0xFFFFF),
+ InstNo(0xFFFFF),
+ LocNo(0xFFFFFF) { }
+
+ ValueIDNum(uint64_t Block, uint64_t Inst, uint64_t Loc)
+ : BlockNo(Block), InstNo(Inst), LocNo(Loc) { }
+
+ ValueIDNum(uint64_t Block, uint64_t Inst, LocIdx Loc)
+ : BlockNo(Block), InstNo(Inst), LocNo(Loc.asU64()) { }
+
+ uint64_t getBlock() const { return BlockNo; }
+ uint64_t getInst() const { return InstNo; }
+ uint64_t getLoc() const { return LocNo; }
+ bool isPHI() const { return InstNo == 0; }
+
+ uint64_t asU64() const {
+ uint64_t TmpBlock = BlockNo;
+ uint64_t TmpInst = InstNo;
+ return TmpBlock << 44ull | TmpInst << NUM_LOC_BITS | LocNo;
+ }
+
+ static ValueIDNum fromU64(uint64_t v) {
+ uint64_t L = (v & 0x3FFF);
+ return {v >> 44ull, ((v >> NUM_LOC_BITS) & 0xFFFFF), L};
+ }
+
+ bool operator<(const ValueIDNum &Other) const {
+ return asU64() < Other.asU64();
+ }
+
+ bool operator==(const ValueIDNum &Other) const {
+ return std::tie(BlockNo, InstNo, LocNo) ==
+ std::tie(Other.BlockNo, Other.InstNo, Other.LocNo);
+ }
+
+ bool operator!=(const ValueIDNum &Other) const { return !(*this == Other); }
+
+ std::string asString(const std::string &mlocname) const {
+ return Twine("Value{bb: ")
+ .concat(Twine(BlockNo).concat(
+ Twine(", inst: ")
+ .concat((InstNo ? Twine(InstNo) : Twine("live-in"))
+ .concat(Twine(", loc: ").concat(Twine(mlocname)))
+ .concat(Twine("}")))))
+ .str();
+ }
+
+ static ValueIDNum EmptyValue;
+};
+
+} // end anonymous namespace
+
+namespace {
+
+/// Meta qualifiers for a value. Pair of whatever expression is used to qualify
+/// the the value, and Boolean of whether or not it's indirect.
+class DbgValueProperties {
+public:
+ DbgValueProperties(const DIExpression *DIExpr, bool Indirect)
+ : DIExpr(DIExpr), Indirect(Indirect) {}
+
+ /// Extract properties from an existing DBG_VALUE instruction.
+ DbgValueProperties(const MachineInstr &MI) {
+ assert(MI.isDebugValue());
+ DIExpr = MI.getDebugExpression();
+ Indirect = MI.getOperand(1).isImm();
+ }
+
+ bool operator==(const DbgValueProperties &Other) const {
+ return std::tie(DIExpr, Indirect) == std::tie(Other.DIExpr, Other.Indirect);
+ }
+
+ bool operator!=(const DbgValueProperties &Other) const {
+ return !(*this == Other);
+ }
+
+ const DIExpression *DIExpr;
+ bool Indirect;
+};
+
+/// Tracker for what values are in machine locations. Listens to the Things
+/// being Done by various instructions, and maintains a table of what machine
+/// locations have what values (as defined by a ValueIDNum).
+///
+/// There are potentially a much larger number of machine locations on the
+/// target machine than the actual working-set size of the function. On x86 for
+/// example, we're extremely unlikely to want to track values through control
+/// or debug registers. To avoid doing so, MLocTracker has several layers of
+/// indirection going on, with two kinds of ``location'':
+/// * A LocID uniquely identifies a register or spill location, with a
+/// predictable value.
+/// * A LocIdx is a key (in the database sense) for a LocID and a ValueIDNum.
+/// Whenever a location is def'd or used by a MachineInstr, we automagically
+/// create a new LocIdx for a location, but not otherwise. This ensures we only
+/// account for locations that are actually used or defined. The cost is another
+/// vector lookup (of LocID -> LocIdx) over any other implementation. This is
+/// fairly cheap, and the compiler tries to reduce the working-set at any one
+/// time in the function anyway.
+///
+/// Register mask operands completely blow this out of the water; I've just
+/// piled hacks on top of hacks to get around that.
+class MLocTracker {
+public:
+ MachineFunction &MF;
+ const TargetInstrInfo &TII;
+ const TargetRegisterInfo &TRI;
+ const TargetLowering &TLI;
+
+ /// IndexedMap type, mapping from LocIdx to ValueIDNum.
+ using LocToValueType = IndexedMap<ValueIDNum, LocIdxToIndexFunctor>;
+
+ /// Map of LocIdxes to the ValueIDNums that they store. This is tightly
+ /// packed, entries only exist for locations that are being tracked.
+ LocToValueType LocIdxToIDNum;
+
+ /// "Map" of machine location IDs (i.e., raw register or spill number) to the
+ /// LocIdx key / number for that location. There are always at least as many
+ /// as the number of registers on the target -- if the value in the register
+ /// is not being tracked, then the LocIdx value will be zero. New entries are
+ /// appended if a new spill slot begins being tracked.
+ /// This, and the corresponding reverse map persist for the analysis of the
+ /// whole function, and is necessarying for decoding various vectors of
+ /// values.
+ std::vector<LocIdx> LocIDToLocIdx;
+
+ /// Inverse map of LocIDToLocIdx.
+ IndexedMap<unsigned, LocIdxToIndexFunctor> LocIdxToLocID;
+
+ /// Unique-ification of spill slots. Used to number them -- their LocID
+ /// number is the index in SpillLocs minus one plus NumRegs.
+ UniqueVector<SpillLoc> SpillLocs;
+
+ // If we discover a new machine location, assign it an mphi with this
+ // block number.
+ unsigned CurBB;
+
+ /// Cached local copy of the number of registers the target has.
+ unsigned NumRegs;
+
+ /// Collection of register mask operands that have been observed. Second part
+ /// of pair indicates the instruction that they happened in. Used to
+ /// reconstruct where defs happened if we start tracking a location later
+ /// on.
+ SmallVector<std::pair<const MachineOperand *, unsigned>, 32> Masks;
+
+ /// Iterator for locations and the values they contain. Dereferencing
+ /// produces a struct/pair containing the LocIdx key for this location,
+ /// and a reference to the value currently stored. Simplifies the process
+ /// of seeking a particular location.
+ class MLocIterator {
+ LocToValueType &ValueMap;
+ LocIdx Idx;
+
+ public:
+ class value_type {
+ public:
+ value_type(LocIdx Idx, ValueIDNum &Value) : Idx(Idx), Value(Value) { }
+ const LocIdx Idx; /// Read-only index of this location.
+ ValueIDNum &Value; /// Reference to the stored value at this location.
+ };
+
+ MLocIterator(LocToValueType &ValueMap, LocIdx Idx)
+ : ValueMap(ValueMap), Idx(Idx) { }
+
+ bool operator==(const MLocIterator &Other) const {
+ assert(&ValueMap == &Other.ValueMap);
+ return Idx == Other.Idx;
+ }
+
+ bool operator!=(const MLocIterator &Other) const {
+ return !(*this == Other);
+ }
+
+ void operator++() {
+ Idx = LocIdx(Idx.asU64() + 1);
+ }
+
+ value_type operator*() {
+ return value_type(Idx, ValueMap[LocIdx(Idx)]);
+ }
+ };
+
+ MLocTracker(MachineFunction &MF, const TargetInstrInfo &TII,
+ const TargetRegisterInfo &TRI, const TargetLowering &TLI)
+ : MF(MF), TII(TII), TRI(TRI), TLI(TLI),
+ LocIdxToIDNum(ValueIDNum::EmptyValue),
+ LocIdxToLocID(0) {
+ NumRegs = TRI.getNumRegs();
+ reset();
+ LocIDToLocIdx.resize(NumRegs, LocIdx::MakeIllegalLoc());
+ assert(NumRegs < (1u << NUM_LOC_BITS)); // Detect bit packing failure
+
+ // Always track SP. This avoids the implicit clobbering caused by regmasks
+ // from affectings its values. (LiveDebugValues disbelieves calls and
+ // regmasks that claim to clobber SP).
+ Register SP = TLI.getStackPointerRegisterToSaveRestore();
+ if (SP) {
+ unsigned ID = getLocID(SP, false);
+ (void)lookupOrTrackRegister(ID);
+ }
+ }
+
+ /// Produce location ID number for indexing LocIDToLocIdx. Takes the register
+ /// or spill number, and flag for whether it's a spill or not.
+ unsigned getLocID(Register RegOrSpill, bool isSpill) {
+ return (isSpill) ? RegOrSpill.id() + NumRegs - 1 : RegOrSpill.id();
+ }
+
+ /// Accessor for reading the value at Idx.
+ ValueIDNum getNumAtPos(LocIdx Idx) const {
+ assert(Idx.asU64() < LocIdxToIDNum.size());
+ return LocIdxToIDNum[Idx];
+ }
+
+ unsigned getNumLocs(void) const { return LocIdxToIDNum.size(); }
+
+ /// Reset all locations to contain a PHI value at the designated block. Used
+ /// sometimes for actual PHI values, othertimes to indicate the block entry
+ /// value (before any more information is known).
+ void setMPhis(unsigned NewCurBB) {
+ CurBB = NewCurBB;
+ for (auto Location : locations())
+ Location.Value = {CurBB, 0, Location.Idx};
+ }
+
+ /// Load values for each location from array of ValueIDNums. Take current
+ /// bbnum just in case we read a value from a hitherto untouched register.
+ void loadFromArray(ValueIDNum *Locs, unsigned NewCurBB) {
+ CurBB = NewCurBB;
+ // Iterate over all tracked locations, and load each locations live-in
+ // value into our local index.
+ for (auto Location : locations())
+ Location.Value = Locs[Location.Idx.asU64()];
+ }
+
+ /// Wipe any un-necessary location records after traversing a block.
+ void reset(void) {
+ // We could reset all the location values too; however either loadFromArray
+ // or setMPhis should be called before this object is re-used. Just
+ // clear Masks, they're definitely not needed.
+ Masks.clear();
+ }
+
+ /// Clear all data. Destroys the LocID <=> LocIdx map, which makes most of
+ /// the information in this pass uninterpretable.
+ void clear(void) {
+ reset();
+ LocIDToLocIdx.clear();
+ LocIdxToLocID.clear();
+ LocIdxToIDNum.clear();
+ //SpillLocs.reset(); XXX UniqueVector::reset assumes a SpillLoc casts from 0
+ SpillLocs = decltype(SpillLocs)();
+
+ LocIDToLocIdx.resize(NumRegs, LocIdx::MakeIllegalLoc());
+ }
+
+ /// Set a locaiton to a certain value.
+ void setMLoc(LocIdx L, ValueIDNum Num) {
+ assert(L.asU64() < LocIdxToIDNum.size());
+ LocIdxToIDNum[L] = Num;
+ }
+
+ /// Create a LocIdx for an untracked register ID. Initialize it to either an
+ /// mphi value representing a live-in, or a recent register mask clobber.
+ LocIdx trackRegister(unsigned ID) {
+ assert(ID != 0);
+ LocIdx NewIdx = LocIdx(LocIdxToIDNum.size());
+ LocIdxToIDNum.grow(NewIdx);
+ LocIdxToLocID.grow(NewIdx);
+
+ // Default: it's an mphi.
+ ValueIDNum ValNum = {CurBB, 0, NewIdx};
+ // Was this reg ever touched by a regmask?
+ for (const auto &MaskPair : reverse(Masks)) {
+ if (MaskPair.first->clobbersPhysReg(ID)) {
+ // There was an earlier def we skipped.
+ ValNum = {CurBB, MaskPair.second, NewIdx};
+ break;
+ }
+ }
+
+ LocIdxToIDNum[NewIdx] = ValNum;
+ LocIdxToLocID[NewIdx] = ID;
+ return NewIdx;
+ }
+
+ LocIdx lookupOrTrackRegister(unsigned ID) {
+ LocIdx &Index = LocIDToLocIdx[ID];
+ if (Index.isIllegal())
+ Index = trackRegister(ID);
+ return Index;
+ }
+
+ /// Record a definition of the specified register at the given block / inst.
+ /// This doesn't take a ValueIDNum, because the definition and its location
+ /// are synonymous.
+ void defReg(Register R, unsigned BB, unsigned Inst) {
+ unsigned ID = getLocID(R, false);
+ LocIdx Idx = lookupOrTrackRegister(ID);
+ ValueIDNum ValueID = {BB, Inst, Idx};
+ LocIdxToIDNum[Idx] = ValueID;
+ }
+
+ /// Set a register to a value number. To be used if the value number is
+ /// known in advance.
+ void setReg(Register R, ValueIDNum ValueID) {
+ unsigned ID = getLocID(R, false);
+ LocIdx Idx = lookupOrTrackRegister(ID);
+ LocIdxToIDNum[Idx] = ValueID;
+ }
+
+ ValueIDNum readReg(Register R) {
+ unsigned ID = getLocID(R, false);
+ LocIdx Idx = lookupOrTrackRegister(ID);
+ return LocIdxToIDNum[Idx];
+ }
+
+ /// Reset a register value to zero / empty. Needed to replicate the
+ /// VarLoc implementation where a copy to/from a register effectively
+ /// clears the contents of the source register. (Values can only have one
+ /// machine location in VarLocBasedImpl).
+ void wipeRegister(Register R) {
+ unsigned ID = getLocID(R, false);
+ LocIdx Idx = LocIDToLocIdx[ID];
+ LocIdxToIDNum[Idx] = ValueIDNum::EmptyValue;
+ }
+
+ /// Determine the LocIdx of an existing register.
+ LocIdx getRegMLoc(Register R) {
+ unsigned ID = getLocID(R, false);
+ return LocIDToLocIdx[ID];
+ }
+
+ /// Record a RegMask operand being executed. Defs any register we currently
+ /// track, stores a pointer to the mask in case we have to account for it
+ /// later.
+ void writeRegMask(const MachineOperand *MO, unsigned CurBB, unsigned InstID) {
+ // Ensure SP exists, so that we don't override it later.
+ Register SP = TLI.getStackPointerRegisterToSaveRestore();
+
+ // Def any register we track have that isn't preserved. The regmask
+ // terminates the liveness of a register, meaning its value can't be
+ // relied upon -- we represent this by giving it a new value.
+ for (auto Location : locations()) {
+ unsigned ID = LocIdxToLocID[Location.Idx];
+ // Don't clobber SP, even if the mask says it's clobbered.
+ if (ID < NumRegs && ID != SP && MO->clobbersPhysReg(ID))
+ defReg(ID, CurBB, InstID);
+ }
+ Masks.push_back(std::make_pair(MO, InstID));
+ }
+
+ /// Find LocIdx for SpillLoc \p L, creating a new one if it's not tracked.
+ LocIdx getOrTrackSpillLoc(SpillLoc L) {
+ unsigned SpillID = SpillLocs.idFor(L);
+ if (SpillID == 0) {
+ SpillID = SpillLocs.insert(L);
+ unsigned L = getLocID(SpillID, true);
+ LocIdx Idx = LocIdx(LocIdxToIDNum.size()); // New idx
+ LocIdxToIDNum.grow(Idx);
+ LocIdxToLocID.grow(Idx);
+ LocIDToLocIdx.push_back(Idx);
+ LocIdxToLocID[Idx] = L;
+ return Idx;
+ } else {
+ unsigned L = getLocID(SpillID, true);
+ LocIdx Idx = LocIDToLocIdx[L];
+ return Idx;
+ }
+ }
+
+ /// Set the value stored in a spill slot.
+ void setSpill(SpillLoc L, ValueIDNum ValueID) {
+ LocIdx Idx = getOrTrackSpillLoc(L);
+ LocIdxToIDNum[Idx] = ValueID;
+ }
+
+ /// Read whatever value is in a spill slot, or None if it isn't tracked.
+ Optional<ValueIDNum> readSpill(SpillLoc L) {
+ unsigned SpillID = SpillLocs.idFor(L);
+ if (SpillID == 0)
+ return None;
+
+ unsigned LocID = getLocID(SpillID, true);
+ LocIdx Idx = LocIDToLocIdx[LocID];
+ return LocIdxToIDNum[Idx];
+ }
+
+ /// Determine the LocIdx of a spill slot. Return None if it previously
+ /// hasn't had a value assigned.
+ Optional<LocIdx> getSpillMLoc(SpillLoc L) {
+ unsigned SpillID = SpillLocs.idFor(L);
+ if (SpillID == 0)
+ return None;
+ unsigned LocNo = getLocID(SpillID, true);
+ return LocIDToLocIdx[LocNo];
+ }
+
+ /// Return true if Idx is a spill machine location.
+ bool isSpill(LocIdx Idx) const {
+ return LocIdxToLocID[Idx] >= NumRegs;
+ }
+
+ MLocIterator begin() {
+ return MLocIterator(LocIdxToIDNum, 0);
+ }
+
+ MLocIterator end() {
+ return MLocIterator(LocIdxToIDNum, LocIdxToIDNum.size());
+ }
+
+ /// Return a range over all locations currently tracked.
+ iterator_range<MLocIterator> locations() {
+ return llvm::make_range(begin(), end());
+ }
+
+ std::string LocIdxToName(LocIdx Idx) const {
+ unsigned ID = LocIdxToLocID[Idx];
+ if (ID >= NumRegs)
+ return Twine("slot ").concat(Twine(ID - NumRegs)).str();
+ else
+ return TRI.getRegAsmName(ID).str();
+ }
+
+ std::string IDAsString(const ValueIDNum &Num) const {
+ std::string DefName = LocIdxToName(Num.getLoc());
+ return Num.asString(DefName);
+ }
+
+ LLVM_DUMP_METHOD
+ void dump() {
+ for (auto Location : locations()) {
+ std::string MLocName = LocIdxToName(Location.Value.getLoc());
+ std::string DefName = Location.Value.asString(MLocName);
+ dbgs() << LocIdxToName(Location.Idx) << " --> " << DefName << "\n";
+ }
+ }
+
+ LLVM_DUMP_METHOD
+ void dump_mloc_map() {
+ for (auto Location : locations()) {
+ std::string foo = LocIdxToName(Location.Idx);
+ dbgs() << "Idx " << Location.Idx.asU64() << " " << foo << "\n";
+ }
+ }
+
+ /// Create a DBG_VALUE based on machine location \p MLoc. Qualify it with the
+ /// information in \pProperties, for variable Var. Don't insert it anywhere,
+ /// just return the builder for it.
+ MachineInstrBuilder emitLoc(Optional<LocIdx> MLoc, const DebugVariable &Var,
+ const DbgValueProperties &Properties) {
+ DebugLoc DL = DILocation::get(Var.getVariable()->getContext(), 0, 0,
+ Var.getVariable()->getScope(),
+ const_cast<DILocation *>(Var.getInlinedAt()));
+ auto MIB = BuildMI(MF, DL, TII.get(TargetOpcode::DBG_VALUE));
+
+ const DIExpression *Expr = Properties.DIExpr;
+ if (!MLoc) {
+ // No location -> DBG_VALUE $noreg
+ MIB.addReg(0, RegState::Debug);
+ MIB.addReg(0, RegState::Debug);
+ } else if (LocIdxToLocID[*MLoc] >= NumRegs) {
+ unsigned LocID = LocIdxToLocID[*MLoc];
+ const SpillLoc &Spill = SpillLocs[LocID - NumRegs + 1];
+
+ auto *TRI = MF.getSubtarget().getRegisterInfo();
+ Expr = TRI->prependOffsetExpression(Expr, DIExpression::ApplyOffset,
+ Spill.SpillOffset);
+ unsigned Base = Spill.SpillBase;
+ MIB.addReg(Base, RegState::Debug);
+ MIB.addImm(0);
+ } else {
+ unsigned LocID = LocIdxToLocID[*MLoc];
+ MIB.addReg(LocID, RegState::Debug);
+ if (Properties.Indirect)
+ MIB.addImm(0);
+ else
+ MIB.addReg(0, RegState::Debug);
+ }
+
+ MIB.addMetadata(Var.getVariable());
+ MIB.addMetadata(Expr);
+ return MIB;
+ }
+};
+
+/// Class recording the (high level) _value_ of a variable. Identifies either
+/// the value of the variable as a ValueIDNum, or a constant MachineOperand.
+/// This class also stores meta-information about how the value is qualified.
+/// Used to reason about variable values when performing the second
+/// (DebugVariable specific) dataflow analysis.
+class DbgValue {
+public:
+ union {
+ /// If Kind is Def, the value number that this value is based on.
+ ValueIDNum ID;
+ /// If Kind is Const, the MachineOperand defining this value.
+ MachineOperand MO;
+ /// For a NoVal DbgValue, which block it was generated in.
+ unsigned BlockNo;
+ };
+ /// Qualifiers for the ValueIDNum above.
+ DbgValueProperties Properties;
+
+ typedef enum {
+ Undef, // Represents a DBG_VALUE $noreg in the transfer function only.
+ Def, // This value is defined by an inst, or is a PHI value.
+ Const, // A constant value contained in the MachineOperand field.
+ Proposed, // This is a tentative PHI value, which may be confirmed or
+ // invalidated later.
+ NoVal // Empty DbgValue, generated during dataflow. BlockNo stores
+ // which block this was generated in.
+ } KindT;
+ /// Discriminator for whether this is a constant or an in-program value.
+ KindT Kind;
+
+ DbgValue(const ValueIDNum &Val, const DbgValueProperties &Prop, KindT Kind)
+ : ID(Val), Properties(Prop), Kind(Kind) {
+ assert(Kind == Def || Kind == Proposed);
+ }
+
+ DbgValue(unsigned BlockNo, const DbgValueProperties &Prop, KindT Kind)
+ : BlockNo(BlockNo), Properties(Prop), Kind(Kind) {
+ assert(Kind == NoVal);
+ }
+
+ DbgValue(const MachineOperand &MO, const DbgValueProperties &Prop, KindT Kind)
+ : MO(MO), Properties(Prop), Kind(Kind) {
+ assert(Kind == Const);
+ }
+
+ DbgValue(const DbgValueProperties &Prop, KindT Kind)
+ : Properties(Prop), Kind(Kind) {
+ assert(Kind == Undef &&
+ "Empty DbgValue constructor must pass in Undef kind");
+ }
+
+ void dump(const MLocTracker *MTrack) const {
+ if (Kind == Const) {
+ MO.dump();
+ } else if (Kind == NoVal) {
+ dbgs() << "NoVal(" << BlockNo << ")";
+ } else if (Kind == Proposed) {
+ dbgs() << "VPHI(" << MTrack->IDAsString(ID) << ")";
+ } else {
+ assert(Kind == Def);
+ dbgs() << MTrack->IDAsString(ID);
+ }
+ if (Properties.Indirect)
+ dbgs() << " indir";
+ if (Properties.DIExpr)
+ dbgs() << " " << *Properties.DIExpr;
+ }
+
+ bool operator==(const DbgValue &Other) const {
+ if (std::tie(Kind, Properties) != std::tie(Other.Kind, Other.Properties))
+ return false;
+ else if (Kind == Proposed && ID != Other.ID)
+ return false;
+ else if (Kind == Def && ID != Other.ID)
+ return false;
+ else if (Kind == NoVal && BlockNo != Other.BlockNo)
+ return false;
+ else if (Kind == Const)
+ return MO.isIdenticalTo(Other.MO);
+
+ return true;
+ }
+
+ bool operator!=(const DbgValue &Other) const { return !(*this == Other); }
+};
+
+/// Types for recording sets of variable fragments that overlap. For a given
+/// local variable, we record all other fragments of that variable that could
+/// overlap it, to reduce search time.
+using FragmentOfVar =
+ std::pair<const DILocalVariable *, DIExpression::FragmentInfo>;
+using OverlapMap =
+ DenseMap<FragmentOfVar, SmallVector<DIExpression::FragmentInfo, 1>>;
+
+/// Collection of DBG_VALUEs observed when traversing a block. Records each
+/// variable and the value the DBG_VALUE refers to. Requires the machine value
+/// location dataflow algorithm to have run already, so that values can be
+/// identified.
+class VLocTracker {
+public:
+ /// Map DebugVariable to the latest Value it's defined to have.
+ /// Needs to be a MapVector because we determine order-in-the-input-MIR from
+ /// the order in this container.
+ /// We only retain the last DbgValue in each block for each variable, to
+ /// determine the blocks live-out variable value. The Vars container forms the
+ /// transfer function for this block, as part of the dataflow analysis. The
+ /// movement of values between locations inside of a block is handled at a
+ /// much later stage, in the TransferTracker class.
+ MapVector<DebugVariable, DbgValue> Vars;
+ DenseMap<DebugVariable, const DILocation *> Scopes;
+ MachineBasicBlock *MBB;
+
+public:
+ VLocTracker() {}
+
+ void defVar(const MachineInstr &MI, const DbgValueProperties &Properties,
+ Optional<ValueIDNum> ID) {
+ assert(MI.isDebugValue() || MI.isDebugRef());
+ DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(),
+ MI.getDebugLoc()->getInlinedAt());
+ DbgValue Rec = (ID) ? DbgValue(*ID, Properties, DbgValue::Def)
+ : DbgValue(Properties, DbgValue::Undef);
+
+ // Attempt insertion; overwrite if it's already mapped.
+ auto Result = Vars.insert(std::make_pair(Var, Rec));
+ if (!Result.second)
+ Result.first->second = Rec;
+ Scopes[Var] = MI.getDebugLoc().get();
+ }
+
+ void defVar(const MachineInstr &MI, const MachineOperand &MO) {
+ // Only DBG_VALUEs can define constant-valued variables.
+ assert(MI.isDebugValue());
+ DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(),
+ MI.getDebugLoc()->getInlinedAt());
+ DbgValueProperties Properties(MI);
+ DbgValue Rec = DbgValue(MO, Properties, DbgValue::Const);
+
+ // Attempt insertion; overwrite if it's already mapped.
+ auto Result = Vars.insert(std::make_pair(Var, Rec));
+ if (!Result.second)
+ Result.first->second = Rec;
+ Scopes[Var] = MI.getDebugLoc().get();
+ }
+};
+
+/// Tracker for converting machine value locations and variable values into
+/// variable locations (the output of LiveDebugValues), recorded as DBG_VALUEs
+/// specifying block live-in locations and transfers within blocks.
+///
+/// Operating on a per-block basis, this class takes a (pre-loaded) MLocTracker
+/// and must be initialized with the set of variable values that are live-in to
+/// the block. The caller then repeatedly calls process(). TransferTracker picks
+/// out variable locations for the live-in variable values (if there _is_ a
+/// location) and creates the corresponding DBG_VALUEs. Then, as the block is
+/// stepped through, transfers of values between machine locations are
+/// identified and if profitable, a DBG_VALUE created.
+///
+/// This is where debug use-before-defs would be resolved: a variable with an
+/// unavailable value could materialize in the middle of a block, when the
+/// value becomes available. Or, we could detect clobbers and re-specify the
+/// variable in a backup location. (XXX these are unimplemented).
+class TransferTracker {
+public:
+ const TargetInstrInfo *TII;
+ /// This machine location tracker is assumed to always contain the up-to-date
+ /// value mapping for all machine locations. TransferTracker only reads
+ /// information from it. (XXX make it const?)
+ MLocTracker *MTracker;
+ MachineFunction &MF;
+
+ /// Record of all changes in variable locations at a block position. Awkwardly
+ /// we allow inserting either before or after the point: MBB != nullptr
+ /// indicates it's before, otherwise after.
+ struct Transfer {
+ MachineBasicBlock::iterator Pos; /// Position to insert DBG_VALUes
+ MachineBasicBlock *MBB; /// non-null if we should insert after.
+ SmallVector<MachineInstr *, 4> Insts; /// Vector of DBG_VALUEs to insert.
+ };
+
+ typedef struct {
+ LocIdx Loc;
+ DbgValueProperties Properties;
+ } LocAndProperties;
+
+ /// Collection of transfers (DBG_VALUEs) to be inserted.
+ SmallVector<Transfer, 32> Transfers;
+
+ /// Local cache of what-value-is-in-what-LocIdx. Used to identify differences
+ /// between TransferTrackers view of variable locations and MLocTrackers. For
+ /// example, MLocTracker observes all clobbers, but TransferTracker lazily
+ /// does not.
+ std::vector<ValueIDNum> VarLocs;
+
+ /// Map from LocIdxes to which DebugVariables are based that location.
+ /// Mantained while stepping through the block. Not accurate if
+ /// VarLocs[Idx] != MTracker->LocIdxToIDNum[Idx].
+ std::map<LocIdx, SmallSet<DebugVariable, 4>> ActiveMLocs;
+
+ /// Map from DebugVariable to it's current location and qualifying meta
+ /// information. To be used in conjunction with ActiveMLocs to construct
+ /// enough information for the DBG_VALUEs for a particular LocIdx.
+ DenseMap<DebugVariable, LocAndProperties> ActiveVLocs;
+
+ /// Temporary cache of DBG_VALUEs to be entered into the Transfers collection.
+ SmallVector<MachineInstr *, 4> PendingDbgValues;
+
+ /// Record of a use-before-def: created when a value that's live-in to the
+ /// current block isn't available in any machine location, but it will be
+ /// defined in this block.
+ struct UseBeforeDef {
+ /// Value of this variable, def'd in block.
+ ValueIDNum ID;
+ /// Identity of this variable.
+ DebugVariable Var;
+ /// Additional variable properties.
+ DbgValueProperties Properties;
+ };
+
+ /// Map from instruction index (within the block) to the set of UseBeforeDefs
+ /// that become defined at that instruction.
+ DenseMap<unsigned, SmallVector<UseBeforeDef, 1>> UseBeforeDefs;
+
+ /// The set of variables that are in UseBeforeDefs and can become a location
+ /// once the relevant value is defined. An element being erased from this
+ /// collection prevents the use-before-def materializing.
+ DenseSet<DebugVariable> UseBeforeDefVariables;
+
+ const TargetRegisterInfo &TRI;
+ const BitVector &CalleeSavedRegs;
+
+ TransferTracker(const TargetInstrInfo *TII, MLocTracker *MTracker,
+ MachineFunction &MF, const TargetRegisterInfo &TRI,
+ const BitVector &CalleeSavedRegs)
+ : TII(TII), MTracker(MTracker), MF(MF), TRI(TRI),
+ CalleeSavedRegs(CalleeSavedRegs) {}
+
+ /// Load object with live-in variable values. \p mlocs contains the live-in
+ /// values in each machine location, while \p vlocs the live-in variable
+ /// values. This method picks variable locations for the live-in variables,
+ /// creates DBG_VALUEs and puts them in #Transfers, then prepares the other
+ /// object fields to track variable locations as we step through the block.
+ /// FIXME: could just examine mloctracker instead of passing in \p mlocs?
+ void loadInlocs(MachineBasicBlock &MBB, ValueIDNum *MLocs,
+ SmallVectorImpl<std::pair<DebugVariable, DbgValue>> &VLocs,
+ unsigned NumLocs) {
+ ActiveMLocs.clear();
+ ActiveVLocs.clear();
+ VarLocs.clear();
+ VarLocs.reserve(NumLocs);
+ UseBeforeDefs.clear();
+ UseBeforeDefVariables.clear();
+
+ auto isCalleeSaved = [&](LocIdx L) {
+ unsigned Reg = MTracker->LocIdxToLocID[L];
+ if (Reg >= MTracker->NumRegs)
+ return false;
+ for (MCRegAliasIterator RAI(Reg, &TRI, true); RAI.isValid(); ++RAI)
+ if (CalleeSavedRegs.test(*RAI))
+ return true;
+ return false;
+ };
+
+ // Map of the preferred location for each value.
+ std::map<ValueIDNum, LocIdx> ValueToLoc;
+
+ // Produce a map of value numbers to the current machine locs they live
+ // in. When emulating VarLocBasedImpl, there should only be one
+ // location; when not, we get to pick.
+ for (auto Location : MTracker->locations()) {
+ LocIdx Idx = Location.Idx;
+ ValueIDNum &VNum = MLocs[Idx.asU64()];
+ VarLocs.push_back(VNum);
+ auto it = ValueToLoc.find(VNum);
+ // In order of preference, pick:
+ // * Callee saved registers,
+ // * Other registers,
+ // * Spill slots.
+ if (it == ValueToLoc.end() || MTracker->isSpill(it->second) ||
+ (!isCalleeSaved(it->second) && isCalleeSaved(Idx.asU64()))) {
+ // Insert, or overwrite if insertion failed.
+ auto PrefLocRes = ValueToLoc.insert(std::make_pair(VNum, Idx));
+ if (!PrefLocRes.second)
+ PrefLocRes.first->second = Idx;
+ }
+ }
+
+ // Now map variables to their picked LocIdxes.
+ for (auto Var : VLocs) {
+ if (Var.second.Kind == DbgValue::Const) {
+ PendingDbgValues.push_back(
+ emitMOLoc(Var.second.MO, Var.first, Var.second.Properties));
+ continue;
+ }
+
+ // If the value has no location, we can't make a variable location.
+ const ValueIDNum &Num = Var.second.ID;
+ auto ValuesPreferredLoc = ValueToLoc.find(Num);
+ if (ValuesPreferredLoc == ValueToLoc.end()) {
+ // If it's a def that occurs in this block, register it as a
+ // use-before-def to be resolved as we step through the block.
+ if (Num.getBlock() == (unsigned)MBB.getNumber() && !Num.isPHI())
+ addUseBeforeDef(Var.first, Var.second.Properties, Num);
+ continue;
+ }
+
+ LocIdx M = ValuesPreferredLoc->second;
+ auto NewValue = LocAndProperties{M, Var.second.Properties};
+ auto Result = ActiveVLocs.insert(std::make_pair(Var.first, NewValue));
+ if (!Result.second)
+ Result.first->second = NewValue;
+ ActiveMLocs[M].insert(Var.first);
+ PendingDbgValues.push_back(
+ MTracker->emitLoc(M, Var.first, Var.second.Properties));
+ }
+ flushDbgValues(MBB.begin(), &MBB);
+ }
+
+ /// Record that \p Var has value \p ID, a value that becomes available
+ /// later in the function.
+ void addUseBeforeDef(const DebugVariable &Var,
+ const DbgValueProperties &Properties, ValueIDNum ID) {
+ UseBeforeDef UBD = {ID, Var, Properties};
+ UseBeforeDefs[ID.getInst()].push_back(UBD);
+ UseBeforeDefVariables.insert(Var);
+ }
+
+ /// After the instruction at index \p Inst and position \p pos has been
+ /// processed, check whether it defines a variable value in a use-before-def.
+ /// If so, and the variable value hasn't changed since the start of the
+ /// block, create a DBG_VALUE.
+ void checkInstForNewValues(unsigned Inst, MachineBasicBlock::iterator pos) {
+ auto MIt = UseBeforeDefs.find(Inst);
+ if (MIt == UseBeforeDefs.end())
+ return;
+
+ for (auto &Use : MIt->second) {
+ LocIdx L = Use.ID.getLoc();
+
+ // If something goes very wrong, we might end up labelling a COPY
+ // instruction or similar with an instruction number, where it doesn't
+ // actually define a new value, instead it moves a value. In case this
+ // happens, discard.
+ if (MTracker->LocIdxToIDNum[L] != Use.ID)
+ continue;
+
+ // If a different debug instruction defined the variable value / location
+ // since the start of the block, don't materialize this use-before-def.
+ if (!UseBeforeDefVariables.count(Use.Var))
+ continue;
+
+ PendingDbgValues.push_back(MTracker->emitLoc(L, Use.Var, Use.Properties));
+ }
+ flushDbgValues(pos, nullptr);
+ }
+
+ /// Helper to move created DBG_VALUEs into Transfers collection.
+ void flushDbgValues(MachineBasicBlock::iterator Pos, MachineBasicBlock *MBB) {
+ if (PendingDbgValues.size() > 0) {
+ Transfers.push_back({Pos, MBB, PendingDbgValues});
+ PendingDbgValues.clear();
+ }
+ }
+
+ /// Change a variable value after encountering a DBG_VALUE inside a block.
+ void redefVar(const MachineInstr &MI) {
+ DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(),
+ MI.getDebugLoc()->getInlinedAt());
+ DbgValueProperties Properties(MI);
+
+ const MachineOperand &MO = MI.getOperand(0);
+
+ // Ignore non-register locations, we don't transfer those.
+ if (!MO.isReg() || MO.getReg() == 0) {
+ auto It = ActiveVLocs.find(Var);
+ if (It != ActiveVLocs.end()) {
+ ActiveMLocs[It->second.Loc].erase(Var);
+ ActiveVLocs.erase(It);
+ }
+ // Any use-before-defs no longer apply.
+ UseBeforeDefVariables.erase(Var);
+ return;
+ }
+
+ Register Reg = MO.getReg();
+ LocIdx NewLoc = MTracker->getRegMLoc(Reg);
+ redefVar(MI, Properties, NewLoc);
+ }
+
+ /// Handle a change in variable location within a block. Terminate the
+ /// variables current location, and record the value it now refers to, so
+ /// that we can detect location transfers later on.
+ void redefVar(const MachineInstr &MI, const DbgValueProperties &Properties,
+ Optional<LocIdx> OptNewLoc) {
+ DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(),
+ MI.getDebugLoc()->getInlinedAt());
+ // Any use-before-defs no longer apply.
+ UseBeforeDefVariables.erase(Var);
+
+ // Erase any previous location,
+ auto It = ActiveVLocs.find(Var);
+ if (It != ActiveVLocs.end())
+ ActiveMLocs[It->second.Loc].erase(Var);
+
+ // If there _is_ no new location, all we had to do was erase.
+ if (!OptNewLoc)
+ return;
+ LocIdx NewLoc = *OptNewLoc;
+
+ // Check whether our local copy of values-by-location in #VarLocs is out of
+ // date. Wipe old tracking data for the location if it's been clobbered in
+ // the meantime.
+ if (MTracker->getNumAtPos(NewLoc) != VarLocs[NewLoc.asU64()]) {
+ for (auto &P : ActiveMLocs[NewLoc]) {
+ ActiveVLocs.erase(P);
+ }
+ ActiveMLocs[NewLoc.asU64()].clear();
+ VarLocs[NewLoc.asU64()] = MTracker->getNumAtPos(NewLoc);
+ }
+
+ ActiveMLocs[NewLoc].insert(Var);
+ if (It == ActiveVLocs.end()) {
+ ActiveVLocs.insert(
+ std::make_pair(Var, LocAndProperties{NewLoc, Properties}));
+ } else {
+ It->second.Loc = NewLoc;
+ It->second.Properties = Properties;
+ }
+ }
+
+ /// Explicitly terminate variable locations based on \p mloc. Creates undef
+ /// DBG_VALUEs for any variables that were located there, and clears
+ /// #ActiveMLoc / #ActiveVLoc tracking information for that location.
+ void clobberMloc(LocIdx MLoc, MachineBasicBlock::iterator Pos) {
+ assert(MTracker->isSpill(MLoc));
+ auto ActiveMLocIt = ActiveMLocs.find(MLoc);
+ if (ActiveMLocIt == ActiveMLocs.end())
+ return;
+
+ VarLocs[MLoc.asU64()] = ValueIDNum::EmptyValue;
+
+ for (auto &Var : ActiveMLocIt->second) {
+ auto ActiveVLocIt = ActiveVLocs.find(Var);
+ // Create an undef. We can't feed in a nullptr DIExpression alas,
+ // so use the variables last expression. Pass None as the location.
+ const DIExpression *Expr = ActiveVLocIt->second.Properties.DIExpr;
+ DbgValueProperties Properties(Expr, false);
+ PendingDbgValues.push_back(MTracker->emitLoc(None, Var, Properties));
+ ActiveVLocs.erase(ActiveVLocIt);
+ }
+ flushDbgValues(Pos, nullptr);
+
+ ActiveMLocIt->second.clear();
+ }
+
+ /// Transfer variables based on \p Src to be based on \p Dst. This handles
+ /// both register copies as well as spills and restores. Creates DBG_VALUEs
+ /// describing the movement.
+ void transferMlocs(LocIdx Src, LocIdx Dst, MachineBasicBlock::iterator Pos) {
+ // Does Src still contain the value num we expect? If not, it's been
+ // clobbered in the meantime, and our variable locations are stale.
+ if (VarLocs[Src.asU64()] != MTracker->getNumAtPos(Src))
+ return;
+
+ // assert(ActiveMLocs[Dst].size() == 0);
+ //^^^ Legitimate scenario on account of un-clobbered slot being assigned to?
+ ActiveMLocs[Dst] = ActiveMLocs[Src];
+ VarLocs[Dst.asU64()] = VarLocs[Src.asU64()];
+
+ // For each variable based on Src; create a location at Dst.
+ for (auto &Var : ActiveMLocs[Src]) {
+ auto ActiveVLocIt = ActiveVLocs.find(Var);
+ assert(ActiveVLocIt != ActiveVLocs.end());
+ ActiveVLocIt->second.Loc = Dst;
+
+ assert(Dst != 0);
+ MachineInstr *MI =
+ MTracker->emitLoc(Dst, Var, ActiveVLocIt->second.Properties);
+ PendingDbgValues.push_back(MI);
+ }
+ ActiveMLocs[Src].clear();
+ flushDbgValues(Pos, nullptr);
+
+ // XXX XXX XXX "pretend to be old LDV" means dropping all tracking data
+ // about the old location.
+ if (EmulateOldLDV)
+ VarLocs[Src.asU64()] = ValueIDNum::EmptyValue;
+ }
+
+ MachineInstrBuilder emitMOLoc(const MachineOperand &MO,
+ const DebugVariable &Var,
+ const DbgValueProperties &Properties) {
+ DebugLoc DL = DILocation::get(Var.getVariable()->getContext(), 0, 0,
+ Var.getVariable()->getScope(),
+ const_cast<DILocation *>(Var.getInlinedAt()));
+ auto MIB = BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE));
+ MIB.add(MO);
+ if (Properties.Indirect)
+ MIB.addImm(0);
+ else
+ MIB.addReg(0);
+ MIB.addMetadata(Var.getVariable());
+ MIB.addMetadata(Properties.DIExpr);
+ return MIB;
+ }
+};
+
+class InstrRefBasedLDV : public LDVImpl {
+private:
+ using FragmentInfo = DIExpression::FragmentInfo;
+ using OptFragmentInfo = Optional<DIExpression::FragmentInfo>;
+
+ // Helper while building OverlapMap, a map of all fragments seen for a given
+ // DILocalVariable.
+ using VarToFragments =
+ DenseMap<const DILocalVariable *, SmallSet<FragmentInfo, 4>>;
+
+ /// Machine location/value transfer function, a mapping of which locations
+ /// are assigned which new values.
+ using MLocTransferMap = std::map<LocIdx, ValueIDNum>;
+
+ /// Live in/out structure for the variable values: a per-block map of
+ /// variables to their values. XXX, better name?
+ using LiveIdxT =
+ DenseMap<const MachineBasicBlock *, DenseMap<DebugVariable, DbgValue> *>;
+
+ using VarAndLoc = std::pair<DebugVariable, DbgValue>;
+
+ /// Type for a live-in value: the predecessor block, and its value.
+ using InValueT = std::pair<MachineBasicBlock *, DbgValue *>;
+
+ /// Vector (per block) of a collection (inner smallvector) of live-ins.
+ /// Used as the result type for the variable value dataflow problem.
+ using LiveInsT = SmallVector<SmallVector<VarAndLoc, 8>, 8>;
+
+ const TargetRegisterInfo *TRI;
+ const TargetInstrInfo *TII;
+ const TargetFrameLowering *TFI;
+ BitVector CalleeSavedRegs;
+ LexicalScopes LS;
+ TargetPassConfig *TPC;
+
+ /// Object to track machine locations as we step through a block. Could
+ /// probably be a field rather than a pointer, as it's always used.
+ MLocTracker *MTracker;
+
+ /// Number of the current block LiveDebugValues is stepping through.
+ unsigned CurBB;
+
+ /// Number of the current instruction LiveDebugValues is evaluating.
+ unsigned CurInst;
+
+ /// Variable tracker -- listens to DBG_VALUEs occurring as InstrRefBasedImpl
+ /// steps through a block. Reads the values at each location from the
+ /// MLocTracker object.
+ VLocTracker *VTracker;
+
+ /// Tracker for transfers, listens to DBG_VALUEs and transfers of values
+ /// between locations during stepping, creates new DBG_VALUEs when values move
+ /// location.
+ TransferTracker *TTracker;
+
+ /// Blocks which are artificial, i.e. blocks which exclusively contain
+ /// instructions without DebugLocs, or with line 0 locations.
+ SmallPtrSet<const MachineBasicBlock *, 16> ArtificialBlocks;
+
+ // Mapping of blocks to and from their RPOT order.
+ DenseMap<unsigned int, MachineBasicBlock *> OrderToBB;
+ DenseMap<MachineBasicBlock *, unsigned int> BBToOrder;
+ DenseMap<unsigned, unsigned> BBNumToRPO;
+
+ /// Pair of MachineInstr, and its 1-based offset into the containing block.
+ using InstAndNum = std::pair<const MachineInstr *, unsigned>;
+ /// Map from debug instruction number to the MachineInstr labelled with that
+ /// number, and its location within the function. Used to transform
+ /// instruction numbers in DBG_INSTR_REFs into machine value numbers.
+ std::map<uint64_t, InstAndNum> DebugInstrNumToInstr;
+
+ // Map of overlapping variable fragments.
+ OverlapMap OverlapFragments;
+ VarToFragments SeenFragments;
+
+ /// Tests whether this instruction is a spill to a stack slot.
+ bool isSpillInstruction(const MachineInstr &MI, MachineFunction *MF);
+
+ /// Decide if @MI is a spill instruction and return true if it is. We use 2
+ /// criteria to make this decision:
+ /// - Is this instruction a store to a spill slot?
+ /// - Is there a register operand that is both used and killed?
+ /// TODO: Store optimization can fold spills into other stores (including
+ /// other spills). We do not handle this yet (more than one memory operand).
+ bool isLocationSpill(const MachineInstr &MI, MachineFunction *MF,
+ unsigned &Reg);
+
+ /// If a given instruction is identified as a spill, return the spill slot
+ /// and set \p Reg to the spilled register.
+ Optional<SpillLoc> isRestoreInstruction(const MachineInstr &MI,
+ MachineFunction *MF, unsigned &Reg);
+
+ /// Given a spill instruction, extract the register and offset used to
+ /// address the spill slot in a target independent way.
+ SpillLoc extractSpillBaseRegAndOffset(const MachineInstr &MI);
+
+ /// Observe a single instruction while stepping through a block.
+ void process(MachineInstr &MI);
+
+ /// Examines whether \p MI is a DBG_VALUE and notifies trackers.
+ /// \returns true if MI was recognized and processed.
+ bool transferDebugValue(const MachineInstr &MI);
+
+ /// Examines whether \p MI is a DBG_INSTR_REF and notifies trackers.
+ /// \returns true if MI was recognized and processed.
+ bool transferDebugInstrRef(MachineInstr &MI);
+
+ /// Examines whether \p MI is copy instruction, and notifies trackers.
+ /// \returns true if MI was recognized and processed.
+ bool transferRegisterCopy(MachineInstr &MI);
+
+ /// Examines whether \p MI is stack spill or restore instruction, and
+ /// notifies trackers. \returns true if MI was recognized and processed.
+ bool transferSpillOrRestoreInst(MachineInstr &MI);
+
+ /// Examines \p MI for any registers that it defines, and notifies trackers.
+ void transferRegisterDef(MachineInstr &MI);
+
+ /// Copy one location to the other, accounting for movement of subregisters
+ /// too.
+ void performCopy(Register Src, Register Dst);
+
+ void accumulateFragmentMap(MachineInstr &MI);
+
+ /// Step through the function, recording register definitions and movements
+ /// in an MLocTracker. Convert the observations into a per-block transfer
+ /// function in \p MLocTransfer, suitable for using with the machine value
+ /// location dataflow problem.
+ void
+ produceMLocTransferFunction(MachineFunction &MF,
+ SmallVectorImpl<MLocTransferMap> &MLocTransfer,
+ unsigned MaxNumBlocks);
+
+ /// Solve the machine value location dataflow problem. Takes as input the
+ /// transfer functions in \p MLocTransfer. Writes the output live-in and
+ /// live-out arrays to the (initialized to zero) multidimensional arrays in
+ /// \p MInLocs and \p MOutLocs. The outer dimension is indexed by block
+ /// number, the inner by LocIdx.
+ void mlocDataflow(ValueIDNum **MInLocs, ValueIDNum **MOutLocs,
+ SmallVectorImpl<MLocTransferMap> &MLocTransfer);
+
+ /// Perform a control flow join (lattice value meet) of the values in machine
+ /// locations at \p MBB. Follows the algorithm described in the file-comment,
+ /// reading live-outs of predecessors from \p OutLocs, the current live ins
+ /// from \p InLocs, and assigning the newly computed live ins back into
+ /// \p InLocs. \returns two bools -- the first indicates whether a change
+ /// was made, the second whether a lattice downgrade occurred. If the latter
+ /// is true, revisiting this block is necessary.
+ std::tuple<bool, bool>
+ mlocJoin(MachineBasicBlock &MBB,
+ SmallPtrSet<const MachineBasicBlock *, 16> &Visited,
+ ValueIDNum **OutLocs, ValueIDNum *InLocs);
+
+ /// Solve the variable value dataflow problem, for a single lexical scope.
+ /// Uses the algorithm from the file comment to resolve control flow joins,
+ /// although there are extra hacks, see vlocJoin. Reads the
+ /// locations of values from the \p MInLocs and \p MOutLocs arrays (see
+ /// mlocDataflow) and reads the variable values transfer function from
+ /// \p AllTheVlocs. Live-in and Live-out variable values are stored locally,
+ /// with the live-ins permanently stored to \p Output once the fixedpoint is
+ /// reached.
+ /// \p VarsWeCareAbout contains a collection of the variables in \p Scope
+ /// that we should be tracking.
+ /// \p AssignBlocks contains the set of blocks that aren't in \p Scope, but
+ /// which do contain DBG_VALUEs, which VarLocBasedImpl tracks locations
+ /// through.
+ void vlocDataflow(const LexicalScope *Scope, const DILocation *DILoc,
+ const SmallSet<DebugVariable, 4> &VarsWeCareAbout,
+ SmallPtrSetImpl<MachineBasicBlock *> &AssignBlocks,
+ LiveInsT &Output, ValueIDNum **MOutLocs,
+ ValueIDNum **MInLocs,
+ SmallVectorImpl<VLocTracker> &AllTheVLocs);
+
+ /// Compute the live-ins to a block, considering control flow merges according
+ /// to the method in the file comment. Live out and live in variable values
+ /// are stored in \p VLOCOutLocs and \p VLOCInLocs. The live-ins for \p MBB
+ /// are computed and stored into \p VLOCInLocs. \returns true if the live-ins
+ /// are modified.
+ /// \p InLocsT Output argument, storage for calculated live-ins.
+ /// \returns two bools -- the first indicates whether a change
+ /// was made, the second whether a lattice downgrade occurred. If the latter
+ /// is true, revisiting this block is necessary.
+ std::tuple<bool, bool>
+ vlocJoin(MachineBasicBlock &MBB, LiveIdxT &VLOCOutLocs, LiveIdxT &VLOCInLocs,
+ SmallPtrSet<const MachineBasicBlock *, 16> *VLOCVisited,
+ unsigned BBNum, const SmallSet<DebugVariable, 4> &AllVars,
+ ValueIDNum **MOutLocs, ValueIDNum **MInLocs,
+ SmallPtrSet<const MachineBasicBlock *, 8> &InScopeBlocks,
+ SmallPtrSet<const MachineBasicBlock *, 8> &BlocksToExplore,
+ DenseMap<DebugVariable, DbgValue> &InLocsT);
+
+ /// Continue exploration of the variable-value lattice, as explained in the
+ /// file-level comment. \p OldLiveInLocation contains the current
+ /// exploration position, from which we need to descend further. \p Values
+ /// contains the set of live-in values, \p CurBlockRPONum the RPO number of
+ /// the current block, and \p CandidateLocations a set of locations that
+ /// should be considered as PHI locations, if we reach the bottom of the
+ /// lattice. \returns true if we should downgrade; the value is the agreeing
+ /// value number in a non-backedge predecessor.
+ bool vlocDowngradeLattice(const MachineBasicBlock &MBB,
+ const DbgValue &OldLiveInLocation,
+ const SmallVectorImpl<InValueT> &Values,
+ unsigned CurBlockRPONum);
+
+ /// For the given block and live-outs feeding into it, try to find a
+ /// machine location where they all join. If a solution for all predecessors
+ /// can't be found, a location where all non-backedge-predecessors join
+ /// will be returned instead. While this method finds a join location, this
+ /// says nothing as to whether it should be used.
+ /// \returns Pair of value ID if found, and true when the correct value
+ /// is available on all predecessor edges, or false if it's only available
+ /// for non-backedge predecessors.
+ std::tuple<Optional<ValueIDNum>, bool>
+ pickVPHILoc(MachineBasicBlock &MBB, const DebugVariable &Var,
+ const LiveIdxT &LiveOuts, ValueIDNum **MOutLocs,
+ ValueIDNum **MInLocs,
+ const SmallVectorImpl<MachineBasicBlock *> &BlockOrders);
+
+ /// Given the solutions to the two dataflow problems, machine value locations
+ /// in \p MInLocs and live-in variable values in \p SavedLiveIns, runs the
+ /// TransferTracker class over the function to produce live-in and transfer
+ /// DBG_VALUEs, then inserts them. Groups of DBG_VALUEs are inserted in the
+ /// order given by AllVarsNumbering -- this could be any stable order, but
+ /// right now "order of appearence in function, when explored in RPO", so
+ /// that we can compare explictly against VarLocBasedImpl.
+ void emitLocations(MachineFunction &MF, LiveInsT SavedLiveIns,
+ ValueIDNum **MInLocs,
+ DenseMap<DebugVariable, unsigned> &AllVarsNumbering);
+
+ /// Boilerplate computation of some initial sets, artifical blocks and
+ /// RPOT block ordering.
+ void initialSetup(MachineFunction &MF);
+
+ bool ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC) override;
+
+public:
+ /// Default construct and initialize the pass.
+ InstrRefBasedLDV();
+
+ LLVM_DUMP_METHOD
+ void dump_mloc_transfer(const MLocTransferMap &mloc_transfer) const;
+
+ bool isCalleeSaved(LocIdx L) {
+ unsigned Reg = MTracker->LocIdxToLocID[L];
+ for (MCRegAliasIterator RAI(Reg, TRI, true); RAI.isValid(); ++RAI)
+ if (CalleeSavedRegs.test(*RAI))
+ return true;
+ return false;
+ }
+};
+
+} // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+// Implementation
+//===----------------------------------------------------------------------===//
+
+ValueIDNum ValueIDNum::EmptyValue = {UINT_MAX, UINT_MAX, UINT_MAX};
+
+/// Default construct and initialize the pass.
+InstrRefBasedLDV::InstrRefBasedLDV() {}
+
+//===----------------------------------------------------------------------===//
+// Debug Range Extension Implementation
+//===----------------------------------------------------------------------===//
+
+#ifndef NDEBUG
+// Something to restore in the future.
+// void InstrRefBasedLDV::printVarLocInMBB(..)
+#endif
+
+SpillLoc
+InstrRefBasedLDV::extractSpillBaseRegAndOffset(const MachineInstr &MI) {
+ assert(MI.hasOneMemOperand() &&
+ "Spill instruction does not have exactly one memory operand?");
+ auto MMOI = MI.memoperands_begin();
+ const PseudoSourceValue *PVal = (*MMOI)->getPseudoValue();
+ assert(PVal->kind() == PseudoSourceValue::FixedStack &&
+ "Inconsistent memory operand in spill instruction");
+ int FI = cast<FixedStackPseudoSourceValue>(PVal)->getFrameIndex();
+ const MachineBasicBlock *MBB = MI.getParent();
+ Register Reg;
+ StackOffset Offset = TFI->getFrameIndexReference(*MBB->getParent(), FI, Reg);
+ return {Reg, Offset};
+}
+
+/// End all previous ranges related to @MI and start a new range from @MI
+/// if it is a DBG_VALUE instr.
+bool InstrRefBasedLDV::transferDebugValue(const MachineInstr &MI) {
+ if (!MI.isDebugValue())
+ return false;
+
+ const DILocalVariable *Var = MI.getDebugVariable();
+ const DIExpression *Expr = MI.getDebugExpression();
+ const DILocation *DebugLoc = MI.getDebugLoc();
+ const DILocation *InlinedAt = DebugLoc->getInlinedAt();
+ assert(Var->isValidLocationForIntrinsic(DebugLoc) &&
+ "Expected inlined-at fields to agree");
+
+ DebugVariable V(Var, Expr, InlinedAt);
+ DbgValueProperties Properties(MI);
+
+ // If there are no instructions in this lexical scope, do no location tracking
+ // at all, this variable shouldn't get a legitimate location range.
+ auto *Scope = LS.findLexicalScope(MI.getDebugLoc().get());
+ if (Scope == nullptr)
+ return true; // handled it; by doing nothing
+
+ const MachineOperand &MO = MI.getOperand(0);
+
+ // MLocTracker needs to know that this register is read, even if it's only
+ // read by a debug inst.
+ if (MO.isReg() && MO.getReg() != 0)
+ (void)MTracker->readReg(MO.getReg());
+
+ // If we're preparing for the second analysis (variables), the machine value
+ // locations are already solved, and we report this DBG_VALUE and the value
+ // it refers to to VLocTracker.
+ if (VTracker) {
+ if (MO.isReg()) {
+ // Feed defVar the new variable location, or if this is a
+ // DBG_VALUE $noreg, feed defVar None.
+ if (MO.getReg())
+ VTracker->defVar(MI, Properties, MTracker->readReg(MO.getReg()));
+ else
+ VTracker->defVar(MI, Properties, None);
+ } else if (MI.getOperand(0).isImm() || MI.getOperand(0).isFPImm() ||
+ MI.getOperand(0).isCImm()) {
+ VTracker->defVar(MI, MI.getOperand(0));
+ }
+ }
+
+ // If performing final tracking of transfers, report this variable definition
+ // to the TransferTracker too.
+ if (TTracker)
+ TTracker->redefVar(MI);
+ return true;
+}
+
+bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI) {
+ if (!MI.isDebugRef())
+ return false;
+
+ // Only handle this instruction when we are building the variable value
+ // transfer function.
+ if (!VTracker)
+ return false;
+
+ unsigned InstNo = MI.getOperand(0).getImm();
+ unsigned OpNo = MI.getOperand(1).getImm();
+
+ const DILocalVariable *Var = MI.getDebugVariable();
+ const DIExpression *Expr = MI.getDebugExpression();
+ const DILocation *DebugLoc = MI.getDebugLoc();
+ const DILocation *InlinedAt = DebugLoc->getInlinedAt();
+ assert(Var->isValidLocationForIntrinsic(DebugLoc) &&
+ "Expected inlined-at fields to agree");
+
+ DebugVariable V(Var, Expr, InlinedAt);
+
+ auto *Scope = LS.findLexicalScope(MI.getDebugLoc().get());
+ if (Scope == nullptr)
+ return true; // Handled by doing nothing. This variable is never in scope.
+
+ const MachineFunction &MF = *MI.getParent()->getParent();
+
+ // Various optimizations may have happened to the value during codegen,
+ // recorded in the value substitution table. Apply any substitutions to
+ // the instruction / operand number in this DBG_INSTR_REF.
+ auto Sub = MF.DebugValueSubstitutions.find(std::make_pair(InstNo, OpNo));
+ while (Sub != MF.DebugValueSubstitutions.end()) {
+ InstNo = Sub->second.first;
+ OpNo = Sub->second.second;
+ Sub = MF.DebugValueSubstitutions.find(std::make_pair(InstNo, OpNo));
+ }
+
+ // Default machine value number is <None> -- if no instruction defines
+ // the corresponding value, it must have been optimized out.
+ Optional<ValueIDNum> NewID = None;
+
+ // Try to lookup the instruction number, and find the machine value number
+ // that it defines.
+ auto InstrIt = DebugInstrNumToInstr.find(InstNo);
+ if (InstrIt != DebugInstrNumToInstr.end()) {
+ const MachineInstr &TargetInstr = *InstrIt->second.first;
+ uint64_t BlockNo = TargetInstr.getParent()->getNumber();
+
+ // Pick out the designated operand.
+ assert(OpNo < TargetInstr.getNumOperands());
+ const MachineOperand &MO = TargetInstr.getOperand(OpNo);
+
+ // Today, this can only be a register.
+ assert(MO.isReg() && MO.isDef());
+
+ unsigned LocID = MTracker->getLocID(MO.getReg(), false);
+ LocIdx L = MTracker->LocIDToLocIdx[LocID];
+ NewID = ValueIDNum(BlockNo, InstrIt->second.second, L);
+ }
+
+ // We, we have a value number or None. Tell the variable value tracker about
+ // it. The rest of this LiveDebugValues implementation acts exactly the same
+ // for DBG_INSTR_REFs as DBG_VALUEs (just, the former can refer to values that
+ // aren't immediately available).
+ DbgValueProperties Properties(Expr, false);
+ VTracker->defVar(MI, Properties, NewID);
+
+ // If we're on the final pass through the function, decompose this INSTR_REF
+ // into a plain DBG_VALUE.
+ if (!TTracker)
+ return true;
+
+ // Pick a location for the machine value number, if such a location exists.
+ // (This information could be stored in TransferTracker to make it faster).
+ Optional<LocIdx> FoundLoc = None;
+ for (auto Location : MTracker->locations()) {
+ LocIdx CurL = Location.Idx;
+ ValueIDNum ID = MTracker->LocIdxToIDNum[CurL];
+ if (NewID && ID == NewID) {
+ // If this is the first location with that value, pick it. Otherwise,
+ // consider whether it's a "longer term" location.
+ if (!FoundLoc) {
+ FoundLoc = CurL;
+ continue;
+ }
+
+ if (MTracker->isSpill(CurL))
+ FoundLoc = CurL; // Spills are a longer term location.
+ else if (!MTracker->isSpill(*FoundLoc) &&
+ !MTracker->isSpill(CurL) &&
+ !isCalleeSaved(*FoundLoc) &&
+ isCalleeSaved(CurL))
+ FoundLoc = CurL; // Callee saved regs are longer term than normal.
+ }
+ }
+
+ // Tell transfer tracker that the variable value has changed.
+ TTracker->redefVar(MI, Properties, FoundLoc);
+
+ // If there was a value with no location; but the value is defined in a
+ // later instruction in this block, this is a block-local use-before-def.
+ if (!FoundLoc && NewID && NewID->getBlock() == CurBB &&
+ NewID->getInst() > CurInst)
+ TTracker->addUseBeforeDef(V, {MI.getDebugExpression(), false}, *NewID);
+
+ // Produce a DBG_VALUE representing what this DBG_INSTR_REF meant.
+ // This DBG_VALUE is potentially a $noreg / undefined location, if
+ // FoundLoc is None.
+ // (XXX -- could morph the DBG_INSTR_REF in the future).
+ MachineInstr *DbgMI = MTracker->emitLoc(FoundLoc, V, Properties);
+ TTracker->PendingDbgValues.push_back(DbgMI);
+ TTracker->flushDbgValues(MI.getIterator(), nullptr);
+
+ return true;
+}
+
+void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) {
+ // Meta Instructions do not affect the debug liveness of any register they
+ // define.
+ if (MI.isImplicitDef()) {
+ // Except when there's an implicit def, and the location it's defining has
+ // no value number. The whole point of an implicit def is to announce that
+ // the register is live, without be specific about it's value. So define
+ // a value if there isn't one already.
+ ValueIDNum Num = MTracker->readReg(MI.getOperand(0).getReg());
+ // Has a legitimate value -> ignore the implicit def.
+ if (Num.getLoc() != 0)
+ return;
+ // Otherwise, def it here.
+ } else if (MI.isMetaInstruction())
+ return;
+
+ MachineFunction *MF = MI.getMF();
+ const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
+ Register SP = TLI->getStackPointerRegisterToSaveRestore();
+
+ // Find the regs killed by MI, and find regmasks of preserved regs.
+ // Max out the number of statically allocated elements in `DeadRegs`, as this
+ // prevents fallback to std::set::count() operations.
+ SmallSet<uint32_t, 32> DeadRegs;
+ SmallVector<const uint32_t *, 4> RegMasks;
+ SmallVector<const MachineOperand *, 4> RegMaskPtrs;
+ for (const MachineOperand &MO : MI.operands()) {
+ // Determine whether the operand is a register def.
+ if (MO.isReg() && MO.isDef() && MO.getReg() &&
+ Register::isPhysicalRegister(MO.getReg()) &&
+ !(MI.isCall() && MO.getReg() == SP)) {
+ // Remove ranges of all aliased registers.
+ for (MCRegAliasIterator RAI(MO.getReg(), TRI, true); RAI.isValid(); ++RAI)
+ // FIXME: Can we break out of this loop early if no insertion occurs?
+ DeadRegs.insert(*RAI);
+ } else if (MO.isRegMask()) {
+ RegMasks.push_back(MO.getRegMask());
+ RegMaskPtrs.push_back(&MO);
+ }
+ }
+
+ // Tell MLocTracker about all definitions, of regmasks and otherwise.
+ for (uint32_t DeadReg : DeadRegs)
+ MTracker->defReg(DeadReg, CurBB, CurInst);
+
+ for (auto *MO : RegMaskPtrs)
+ MTracker->writeRegMask(MO, CurBB, CurInst);
+}
+
+void InstrRefBasedLDV::performCopy(Register SrcRegNum, Register DstRegNum) {
+ ValueIDNum SrcValue = MTracker->readReg(SrcRegNum);
+
+ MTracker->setReg(DstRegNum, SrcValue);
+
+ // In all circumstances, re-def the super registers. It's definitely a new
+ // value now. This doesn't uniquely identify the composition of subregs, for
+ // example, two identical values in subregisters composed in different
+ // places would not get equal value numbers.
+ for (MCSuperRegIterator SRI(DstRegNum, TRI); SRI.isValid(); ++SRI)
+ MTracker->defReg(*SRI, CurBB, CurInst);
+
+ // If we're emulating VarLocBasedImpl, just define all the subregisters.
+ // DBG_VALUEs of them will expect to be tracked from the DBG_VALUE, not
+ // through prior copies.
+ if (EmulateOldLDV) {
+ for (MCSubRegIndexIterator DRI(DstRegNum, TRI); DRI.isValid(); ++DRI)
+ MTracker->defReg(DRI.getSubReg(), CurBB, CurInst);
+ return;
+ }
+
+ // Otherwise, actually copy subregisters from one location to another.
+ // XXX: in addition, any subregisters of DstRegNum that don't line up with
+ // the source register should be def'd.
+ for (MCSubRegIndexIterator SRI(SrcRegNum, TRI); SRI.isValid(); ++SRI) {
+ unsigned SrcSubReg = SRI.getSubReg();
+ unsigned SubRegIdx = SRI.getSubRegIndex();
+ unsigned DstSubReg = TRI->getSubReg(DstRegNum, SubRegIdx);
+ if (!DstSubReg)
+ continue;
+
+ // Do copy. There are two matching subregisters, the source value should
+ // have been def'd when the super-reg was, the latter might not be tracked
+ // yet.
+ // This will force SrcSubReg to be tracked, if it isn't yet.
+ (void)MTracker->readReg(SrcSubReg);
+ LocIdx SrcL = MTracker->getRegMLoc(SrcSubReg);
+ assert(SrcL.asU64());
+ (void)MTracker->readReg(DstSubReg);
+ LocIdx DstL = MTracker->getRegMLoc(DstSubReg);
+ assert(DstL.asU64());
+ (void)DstL;
+ ValueIDNum CpyValue = {SrcValue.getBlock(), SrcValue.getInst(), SrcL};
+
+ MTracker->setReg(DstSubReg, CpyValue);
+ }
+}
+
+bool InstrRefBasedLDV::isSpillInstruction(const MachineInstr &MI,
+ MachineFunction *MF) {
+ // TODO: Handle multiple stores folded into one.
+ if (!MI.hasOneMemOperand())
+ return false;
+
+ if (!MI.getSpillSize(TII) && !MI.getFoldedSpillSize(TII))
+ return false; // This is not a spill instruction, since no valid size was
+ // returned from either function.
+
+ return true;
+}
+
+bool InstrRefBasedLDV::isLocationSpill(const MachineInstr &MI,
+ MachineFunction *MF, unsigned &Reg) {
+ if (!isSpillInstruction(MI, MF))
+ return false;
+
+ // XXX FIXME: On x86, isStoreToStackSlotPostFE returns '1' instead of an
+ // actual register number.
+ if (ObserveAllStackops) {
+ int FI;
+ Reg = TII->isStoreToStackSlotPostFE(MI, FI);
+ return Reg != 0;
+ }
+
+ auto isKilledReg = [&](const MachineOperand MO, unsigned &Reg) {
+ if (!MO.isReg() || !MO.isUse()) {
+ Reg = 0;
+ return false;
+ }
+ Reg = MO.getReg();
+ return MO.isKill();
+ };
+
+ for (const MachineOperand &MO : MI.operands()) {
+ // In a spill instruction generated by the InlineSpiller the spilled
+ // register has its kill flag set.
+ if (isKilledReg(MO, Reg))
+ return true;
+ if (Reg != 0) {
+ // Check whether next instruction kills the spilled register.
+ // FIXME: Current solution does not cover search for killed register in
+ // bundles and instructions further down the chain.
+ auto NextI = std::next(MI.getIterator());
+ // Skip next instruction that points to basic block end iterator.
+ if (MI.getParent()->end() == NextI)
+ continue;
+ unsigned RegNext;
+ for (const MachineOperand &MONext : NextI->operands()) {
+ // Return true if we came across the register from the
+ // previous spill instruction that is killed in NextI.
+ if (isKilledReg(MONext, RegNext) && RegNext == Reg)
+ return true;
+ }
+ }
+ }
+ // Return false if we didn't find spilled register.
+ return false;
+}
+
+Optional<SpillLoc>
+InstrRefBasedLDV::isRestoreInstruction(const MachineInstr &MI,
+ MachineFunction *MF, unsigned &Reg) {
+ if (!MI.hasOneMemOperand())
+ return None;
+
+ // FIXME: Handle folded restore instructions with more than one memory
+ // operand.
+ if (MI.getRestoreSize(TII)) {
+ Reg = MI.getOperand(0).getReg();
+ return extractSpillBaseRegAndOffset(MI);
+ }
+ return None;
+}
+
+bool InstrRefBasedLDV::transferSpillOrRestoreInst(MachineInstr &MI) {
+ // XXX -- it's too difficult to implement VarLocBasedImpl's stack location
+ // limitations under the new model. Therefore, when comparing them, compare
+ // versions that don't attempt spills or restores at all.
+ if (EmulateOldLDV)
+ return false;
+
+ MachineFunction *MF = MI.getMF();
+ unsigned Reg;
+ Optional<SpillLoc> Loc;
+
+ LLVM_DEBUG(dbgs() << "Examining instruction: "; MI.dump(););
+
+ // First, if there are any DBG_VALUEs pointing at a spill slot that is
+ // written to, terminate that variable location. The value in memory
+ // will have changed. DbgEntityHistoryCalculator doesn't try to detect this.
+ if (isSpillInstruction(MI, MF)) {
+ Loc = extractSpillBaseRegAndOffset(MI);
+
+ if (TTracker) {
+ Optional<LocIdx> MLoc = MTracker->getSpillMLoc(*Loc);
+ if (MLoc)
+ TTracker->clobberMloc(*MLoc, MI.getIterator());
+ }
+ }
+
+ // Try to recognise spill and restore instructions that may transfer a value.
+ if (isLocationSpill(MI, MF, Reg)) {
+ Loc = extractSpillBaseRegAndOffset(MI);
+ auto ValueID = MTracker->readReg(Reg);
+
+ // If the location is empty, produce a phi, signify it's the live-in value.
+ if (ValueID.getLoc() == 0)
+ ValueID = {CurBB, 0, MTracker->getRegMLoc(Reg)};
+
+ MTracker->setSpill(*Loc, ValueID);
+ auto OptSpillLocIdx = MTracker->getSpillMLoc(*Loc);
+ assert(OptSpillLocIdx && "Spill slot set but has no LocIdx?");
+ LocIdx SpillLocIdx = *OptSpillLocIdx;
+
+ // Tell TransferTracker about this spill, produce DBG_VALUEs for it.
+ if (TTracker)
+ TTracker->transferMlocs(MTracker->getRegMLoc(Reg), SpillLocIdx,
+ MI.getIterator());
+ } else {
+ if (!(Loc = isRestoreInstruction(MI, MF, Reg)))
+ return false;
+
+ // Is there a value to be restored?
+ auto OptValueID = MTracker->readSpill(*Loc);
+ if (OptValueID) {
+ ValueIDNum ValueID = *OptValueID;
+ LocIdx SpillLocIdx = *MTracker->getSpillMLoc(*Loc);
+ // XXX -- can we recover sub-registers of this value? Until we can, first
+ // overwrite all defs of the register being restored to.
+ for (MCRegAliasIterator RAI(Reg, TRI, true); RAI.isValid(); ++RAI)
+ MTracker->defReg(*RAI, CurBB, CurInst);
+
+ // Now override the reg we're restoring to.
+ MTracker->setReg(Reg, ValueID);
+
+ // Report this restore to the transfer tracker too.
+ if (TTracker)
+ TTracker->transferMlocs(SpillLocIdx, MTracker->getRegMLoc(Reg),
+ MI.getIterator());
+ } else {
+ // There isn't anything in the location; not clear if this is a code path
+ // that still runs. Def this register anyway just in case.
+ for (MCRegAliasIterator RAI(Reg, TRI, true); RAI.isValid(); ++RAI)
+ MTracker->defReg(*RAI, CurBB, CurInst);
+
+ // Force the spill slot to be tracked.
+ LocIdx L = MTracker->getOrTrackSpillLoc(*Loc);
+
+ // Set the restored value to be a machine phi number, signifying that it's
+ // whatever the spills live-in value is in this block. Definitely has
+ // a LocIdx due to the setSpill above.
+ ValueIDNum ValueID = {CurBB, 0, L};
+ MTracker->setReg(Reg, ValueID);
+ MTracker->setSpill(*Loc, ValueID);
+ }
+ }
+ return true;
+}
+
+bool InstrRefBasedLDV::transferRegisterCopy(MachineInstr &MI) {
+ auto DestSrc = TII->isCopyInstr(MI);
+ if (!DestSrc)
+ return false;
+
+ const MachineOperand *DestRegOp = DestSrc->Destination;
+ const MachineOperand *SrcRegOp = DestSrc->Source;
+
+ auto isCalleeSavedReg = [&](unsigned Reg) {
+ for (MCRegAliasIterator RAI(Reg, TRI, true); RAI.isValid(); ++RAI)
+ if (CalleeSavedRegs.test(*RAI))
+ return true;
+ return false;
+ };
+
+ Register SrcReg = SrcRegOp->getReg();
+ Register DestReg = DestRegOp->getReg();
+
+ // Ignore identity copies. Yep, these make it as far as LiveDebugValues.
+ if (SrcReg == DestReg)
+ return true;
+
+ // For emulating VarLocBasedImpl:
+ // We want to recognize instructions where destination register is callee
+ // saved register. If register that could be clobbered by the call is
+ // included, there would be a great chance that it is going to be clobbered
+ // soon. It is more likely that previous register, which is callee saved, is
+ // going to stay unclobbered longer, even if it is killed.
+ //
+ // For InstrRefBasedImpl, we can track multiple locations per value, so
+ // ignore this condition.
+ if (EmulateOldLDV && !isCalleeSavedReg(DestReg))
+ return false;
+
+ // InstrRefBasedImpl only followed killing copies.
+ if (EmulateOldLDV && !SrcRegOp->isKill())
+ return false;
+
+ // Copy MTracker info, including subregs if available.
+ InstrRefBasedLDV::performCopy(SrcReg, DestReg);
+
+ // Only produce a transfer of DBG_VALUE within a block where old LDV
+ // would have. We might make use of the additional value tracking in some
+ // other way, later.
+ if (TTracker && isCalleeSavedReg(DestReg) && SrcRegOp->isKill())
+ TTracker->transferMlocs(MTracker->getRegMLoc(SrcReg),
+ MTracker->getRegMLoc(DestReg), MI.getIterator());
+
+ // VarLocBasedImpl would quit tracking the old location after copying.
+ if (EmulateOldLDV && SrcReg != DestReg)
+ MTracker->defReg(SrcReg, CurBB, CurInst);
+
+ return true;
+}
+
+/// Accumulate a mapping between each DILocalVariable fragment and other
+/// fragments of that DILocalVariable which overlap. This reduces work during
+/// the data-flow stage from "Find any overlapping fragments" to "Check if the
+/// known-to-overlap fragments are present".
+/// \param MI A previously unprocessed DEBUG_VALUE instruction to analyze for
+/// fragment usage.
+void InstrRefBasedLDV::accumulateFragmentMap(MachineInstr &MI) {
+ DebugVariable MIVar(MI.getDebugVariable(), MI.getDebugExpression(),
+ MI.getDebugLoc()->getInlinedAt());
+ FragmentInfo ThisFragment = MIVar.getFragmentOrDefault();
+
+ // If this is the first sighting of this variable, then we are guaranteed
+ // there are currently no overlapping fragments either. Initialize the set
+ // of seen fragments, record no overlaps for the current one, and return.
+ auto SeenIt = SeenFragments.find(MIVar.getVariable());
+ if (SeenIt == SeenFragments.end()) {
+ SmallSet<FragmentInfo, 4> OneFragment;
+ OneFragment.insert(ThisFragment);
+ SeenFragments.insert({MIVar.getVariable(), OneFragment});
+
+ OverlapFragments.insert({{MIVar.getVariable(), ThisFragment}, {}});
+ return;
+ }
+
+ // If this particular Variable/Fragment pair already exists in the overlap
+ // map, it has already been accounted for.
+ auto IsInOLapMap =
+ OverlapFragments.insert({{MIVar.getVariable(), ThisFragment}, {}});
+ if (!IsInOLapMap.second)
+ return;
+
+ auto &ThisFragmentsOverlaps = IsInOLapMap.first->second;
+ auto &AllSeenFragments = SeenIt->second;
+
+ // Otherwise, examine all other seen fragments for this variable, with "this"
+ // fragment being a previously unseen fragment. Record any pair of
+ // overlapping fragments.
+ for (auto &ASeenFragment : AllSeenFragments) {
+ // Does this previously seen fragment overlap?
+ if (DIExpression::fragmentsOverlap(ThisFragment, ASeenFragment)) {
+ // Yes: Mark the current fragment as being overlapped.
+ ThisFragmentsOverlaps.push_back(ASeenFragment);
+ // Mark the previously seen fragment as being overlapped by the current
+ // one.
+ auto ASeenFragmentsOverlaps =
+ OverlapFragments.find({MIVar.getVariable(), ASeenFragment});
+ assert(ASeenFragmentsOverlaps != OverlapFragments.end() &&
+ "Previously seen var fragment has no vector of overlaps");
+ ASeenFragmentsOverlaps->second.push_back(ThisFragment);
+ }
+ }
+
+ AllSeenFragments.insert(ThisFragment);
+}
+
+void InstrRefBasedLDV::process(MachineInstr &MI) {
+ // Try to interpret an MI as a debug or transfer instruction. Only if it's
+ // none of these should we interpret it's register defs as new value
+ // definitions.
+ if (transferDebugValue(MI))
+ return;
+ if (transferDebugInstrRef(MI))
+ return;
+ if (transferRegisterCopy(MI))
+ return;
+ if (transferSpillOrRestoreInst(MI))
+ return;
+ transferRegisterDef(MI);
+}
+
+void InstrRefBasedLDV::produceMLocTransferFunction(
+ MachineFunction &MF, SmallVectorImpl<MLocTransferMap> &MLocTransfer,
+ unsigned MaxNumBlocks) {
+ // Because we try to optimize around register mask operands by ignoring regs
+ // that aren't currently tracked, we set up something ugly for later: RegMask
+ // operands that are seen earlier than the first use of a register, still need
+ // to clobber that register in the transfer function. But this information
+ // isn't actively recorded. Instead, we track each RegMask used in each block,
+ // and accumulated the clobbered but untracked registers in each block into
+ // the following bitvector. Later, if new values are tracked, we can add
+ // appropriate clobbers.
+ SmallVector<BitVector, 32> BlockMasks;
+ BlockMasks.resize(MaxNumBlocks);
+
+ // Reserve one bit per register for the masks described above.
+ unsigned BVWords = MachineOperand::getRegMaskSize(TRI->getNumRegs());
+ for (auto &BV : BlockMasks)
+ BV.resize(TRI->getNumRegs(), true);
+
+ // Step through all instructions and inhale the transfer function.
+ for (auto &MBB : MF) {
+ // Object fields that are read by trackers to know where we are in the
+ // function.
+ CurBB = MBB.getNumber();
+ CurInst = 1;
+
+ // Set all machine locations to a PHI value. For transfer function
+ // production only, this signifies the live-in value to the block.
+ MTracker->reset();
+ MTracker->setMPhis(CurBB);
+
+ // Step through each instruction in this block.
+ for (auto &MI : MBB) {
+ process(MI);
+ // Also accumulate fragment map.
+ if (MI.isDebugValue())
+ accumulateFragmentMap(MI);
+
+ // Create a map from the instruction number (if present) to the
+ // MachineInstr and its position.
+ if (uint64_t InstrNo = MI.peekDebugInstrNum()) {
+ auto InstrAndPos = std::make_pair(&MI, CurInst);
+ auto InsertResult =
+ DebugInstrNumToInstr.insert(std::make_pair(InstrNo, InstrAndPos));
+
+ // There should never be duplicate instruction numbers.
+ assert(InsertResult.second);
+ (void)InsertResult;
+ }
+
+ ++CurInst;
+ }
+
+ // Produce the transfer function, a map of machine location to new value. If
+ // any machine location has the live-in phi value from the start of the
+ // block, it's live-through and doesn't need recording in the transfer
+ // function.
+ for (auto Location : MTracker->locations()) {
+ LocIdx Idx = Location.Idx;
+ ValueIDNum &P = Location.Value;
+ if (P.isPHI() && P.getLoc() == Idx.asU64())
+ continue;
+
+ // Insert-or-update.
+ auto &TransferMap = MLocTransfer[CurBB];
+ auto Result = TransferMap.insert(std::make_pair(Idx.asU64(), P));
+ if (!Result.second)
+ Result.first->second = P;
+ }
+
+ // Accumulate any bitmask operands into the clobberred reg mask for this
+ // block.
+ for (auto &P : MTracker->Masks) {
+ BlockMasks[CurBB].clearBitsNotInMask(P.first->getRegMask(), BVWords);
+ }
+ }
+
+ // Compute a bitvector of all the registers that are tracked in this block.
+ const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
+ Register SP = TLI->getStackPointerRegisterToSaveRestore();
+ BitVector UsedRegs(TRI->getNumRegs());
+ for (auto Location : MTracker->locations()) {
+ unsigned ID = MTracker->LocIdxToLocID[Location.Idx];
+ if (ID >= TRI->getNumRegs() || ID == SP)
+ continue;
+ UsedRegs.set(ID);
+ }
+
+ // Check that any regmask-clobber of a register that gets tracked, is not
+ // live-through in the transfer function. It needs to be clobbered at the
+ // very least.
+ for (unsigned int I = 0; I < MaxNumBlocks; ++I) {
+ BitVector &BV = BlockMasks[I];
+ BV.flip();
+ BV &= UsedRegs;
+ // This produces all the bits that we clobber, but also use. Check that
+ // they're all clobbered or at least set in the designated transfer
+ // elem.
+ for (unsigned Bit : BV.set_bits()) {
+ unsigned ID = MTracker->getLocID(Bit, false);
+ LocIdx Idx = MTracker->LocIDToLocIdx[ID];
+ auto &TransferMap = MLocTransfer[I];
+
+ // Install a value representing the fact that this location is effectively
+ // written to in this block. As there's no reserved value, instead use
+ // a value number that is never generated. Pick the value number for the
+ // first instruction in the block, def'ing this location, which we know
+ // this block never used anyway.
+ ValueIDNum NotGeneratedNum = ValueIDNum(I, 1, Idx);
+ auto Result =
+ TransferMap.insert(std::make_pair(Idx.asU64(), NotGeneratedNum));
+ if (!Result.second) {
+ ValueIDNum &ValueID = Result.first->second;
+ if (ValueID.getBlock() == I && ValueID.isPHI())
+ // It was left as live-through. Set it to clobbered.
+ ValueID = NotGeneratedNum;
+ }
+ }
+ }
+}
+
+std::tuple<bool, bool>
+InstrRefBasedLDV::mlocJoin(MachineBasicBlock &MBB,
+ SmallPtrSet<const MachineBasicBlock *, 16> &Visited,
+ ValueIDNum **OutLocs, ValueIDNum *InLocs) {
+ LLVM_DEBUG(dbgs() << "join MBB: " << MBB.getNumber() << "\n");
+ bool Changed = false;
+ bool DowngradeOccurred = false;
+
+ // Collect predecessors that have been visited. Anything that hasn't been
+ // visited yet is a backedge on the first iteration, and the meet of it's
+ // lattice value for all locations will be unaffected.
+ SmallVector<const MachineBasicBlock *, 8> BlockOrders;
+ for (auto Pred : MBB.predecessors()) {
+ if (Visited.count(Pred)) {
+ BlockOrders.push_back(Pred);
+ }
+ }
+
+ // Visit predecessors in RPOT order.
+ auto Cmp = [&](const MachineBasicBlock *A, const MachineBasicBlock *B) {
+ return BBToOrder.find(A)->second < BBToOrder.find(B)->second;
+ };
+ llvm::sort(BlockOrders, Cmp);
+
+ // Skip entry block.
+ if (BlockOrders.size() == 0)
+ return std::tuple<bool, bool>(false, false);
+
+ // Step through all machine locations, then look at each predecessor and
+ // detect disagreements.
+ unsigned ThisBlockRPO = BBToOrder.find(&MBB)->second;
+ for (auto Location : MTracker->locations()) {
+ LocIdx Idx = Location.Idx;
+ // Pick out the first predecessors live-out value for this location. It's
+ // guaranteed to be not a backedge, as we order by RPO.
+ ValueIDNum BaseVal = OutLocs[BlockOrders[0]->getNumber()][Idx.asU64()];
+
+ // Some flags for whether there's a disagreement, and whether it's a
+ // disagreement with a backedge or not.
+ bool Disagree = false;
+ bool NonBackEdgeDisagree = false;
+
+ // Loop around everything that wasn't 'base'.
+ for (unsigned int I = 1; I < BlockOrders.size(); ++I) {
+ auto *MBB = BlockOrders[I];
+ if (BaseVal != OutLocs[MBB->getNumber()][Idx.asU64()]) {
+ // Live-out of a predecessor disagrees with the first predecessor.
+ Disagree = true;
+
+ // Test whether it's a disagreemnt in the backedges or not.
+ if (BBToOrder.find(MBB)->second < ThisBlockRPO) // might be self b/e
+ NonBackEdgeDisagree = true;
+ }
+ }
+
+ bool OverRide = false;
+ if (Disagree && !NonBackEdgeDisagree) {
+ // Only the backedges disagree. Consider demoting the livein
+ // lattice value, as per the file level comment. The value we consider
+ // demoting to is the value that the non-backedge predecessors agree on.
+ // The order of values is that non-PHIs are \top, a PHI at this block
+ // \bot, and phis between the two are ordered by their RPO number.
+ // If there's no agreement, or we've already demoted to this PHI value
+ // before, replace with a PHI value at this block.
+
+ // Calculate order numbers: zero means normal def, nonzero means RPO
+ // number.
+ unsigned BaseBlockRPONum = BBNumToRPO[BaseVal.getBlock()] + 1;
+ if (!BaseVal.isPHI())
+ BaseBlockRPONum = 0;
+
+ ValueIDNum &InLocID = InLocs[Idx.asU64()];
+ unsigned InLocRPONum = BBNumToRPO[InLocID.getBlock()] + 1;
+ if (!InLocID.isPHI())
+ InLocRPONum = 0;
+
+ // Should we ignore the disagreeing backedges, and override with the
+ // value the other predecessors agree on (in "base")?
+ unsigned ThisBlockRPONum = BBNumToRPO[MBB.getNumber()] + 1;
+ if (BaseBlockRPONum > InLocRPONum && BaseBlockRPONum < ThisBlockRPONum) {
+ // Override.
+ OverRide = true;
+ DowngradeOccurred = true;
+ }
+ }
+ // else: if we disagree in the non-backedges, then this is definitely
+ // a control flow merge where different values merge. Make it a PHI.
+
+ // Generate a phi...
+ ValueIDNum PHI = {(uint64_t)MBB.getNumber(), 0, Idx};
+ ValueIDNum NewVal = (Disagree && !OverRide) ? PHI : BaseVal;
+ if (InLocs[Idx.asU64()] != NewVal) {
+ Changed |= true;
+ InLocs[Idx.asU64()] = NewVal;
+ }
+ }
+
+ // TODO: Reimplement NumInserted and NumRemoved.
+ return std::tuple<bool, bool>(Changed, DowngradeOccurred);
+}
+
+void InstrRefBasedLDV::mlocDataflow(
+ ValueIDNum **MInLocs, ValueIDNum **MOutLocs,
+ SmallVectorImpl<MLocTransferMap> &MLocTransfer) {
+ std::priority_queue<unsigned int, std::vector<unsigned int>,
+ std::greater<unsigned int>>
+ Worklist, Pending;
+
+ // We track what is on the current and pending worklist to avoid inserting
+ // the same thing twice. We could avoid this with a custom priority queue,
+ // but this is probably not worth it.
+ SmallPtrSet<MachineBasicBlock *, 16> OnPending, OnWorklist;
+
+ // Initialize worklist with every block to be visited.
+ for (unsigned int I = 0; I < BBToOrder.size(); ++I) {
+ Worklist.push(I);
+ OnWorklist.insert(OrderToBB[I]);
+ }
+
+ MTracker->reset();
+
+ // Set inlocs for entry block -- each as a PHI at the entry block. Represents
+ // the incoming value to the function.
+ MTracker->setMPhis(0);
+ for (auto Location : MTracker->locations())
+ MInLocs[0][Location.Idx.asU64()] = Location.Value;
+
+ SmallPtrSet<const MachineBasicBlock *, 16> Visited;
+ while (!Worklist.empty() || !Pending.empty()) {
+ // Vector for storing the evaluated block transfer function.
+ SmallVector<std::pair<LocIdx, ValueIDNum>, 32> ToRemap;
+
+ while (!Worklist.empty()) {
+ MachineBasicBlock *MBB = OrderToBB[Worklist.top()];
+ CurBB = MBB->getNumber();
+ Worklist.pop();
+
+ // Join the values in all predecessor blocks.
+ bool InLocsChanged, DowngradeOccurred;
+ std::tie(InLocsChanged, DowngradeOccurred) =
+ mlocJoin(*MBB, Visited, MOutLocs, MInLocs[CurBB]);
+ InLocsChanged |= Visited.insert(MBB).second;
+
+ // If a downgrade occurred, book us in for re-examination on the next
+ // iteration.
+ if (DowngradeOccurred && OnPending.insert(MBB).second)
+ Pending.push(BBToOrder[MBB]);
+
+ // Don't examine transfer function if we've visited this loc at least
+ // once, and inlocs haven't changed.
+ if (!InLocsChanged)
+ continue;
+
+ // Load the current set of live-ins into MLocTracker.
+ MTracker->loadFromArray(MInLocs[CurBB], CurBB);
+
+ // Each element of the transfer function can be a new def, or a read of
+ // a live-in value. Evaluate each element, and store to "ToRemap".
+ ToRemap.clear();
+ for (auto &P : MLocTransfer[CurBB]) {
+ if (P.second.getBlock() == CurBB && P.second.isPHI()) {
+ // This is a movement of whatever was live in. Read it.
+ ValueIDNum NewID = MTracker->getNumAtPos(P.second.getLoc());
+ ToRemap.push_back(std::make_pair(P.first, NewID));
+ } else {
+ // It's a def. Just set it.
+ assert(P.second.getBlock() == CurBB);
+ ToRemap.push_back(std::make_pair(P.first, P.second));
+ }
+ }
+
+ // Commit the transfer function changes into mloc tracker, which
+ // transforms the contents of the MLocTracker into the live-outs.
+ for (auto &P : ToRemap)
+ MTracker->setMLoc(P.first, P.second);
+
+ // Now copy out-locs from mloc tracker into out-loc vector, checking
+ // whether changes have occurred. These changes can have come from both
+ // the transfer function, and mlocJoin.
+ bool OLChanged = false;
+ for (auto Location : MTracker->locations()) {
+ OLChanged |= MOutLocs[CurBB][Location.Idx.asU64()] != Location.Value;
+ MOutLocs[CurBB][Location.Idx.asU64()] = Location.Value;
+ }
+
+ MTracker->reset();
+
+ // No need to examine successors again if out-locs didn't change.
+ if (!OLChanged)
+ continue;
+
+ // All successors should be visited: put any back-edges on the pending
+ // list for the next dataflow iteration, and any other successors to be
+ // visited this iteration, if they're not going to be already.
+ for (auto s : MBB->successors()) {
+ // Does branching to this successor represent a back-edge?
+ if (BBToOrder[s] > BBToOrder[MBB]) {
+ // No: visit it during this dataflow iteration.
+ if (OnWorklist.insert(s).second)
+ Worklist.push(BBToOrder[s]);
+ } else {
+ // Yes: visit it on the next iteration.
+ if (OnPending.insert(s).second)
+ Pending.push(BBToOrder[s]);
+ }
+ }
+ }
+
+ Worklist.swap(Pending);
+ std::swap(OnPending, OnWorklist);
+ OnPending.clear();
+ // At this point, pending must be empty, since it was just the empty
+ // worklist
+ assert(Pending.empty() && "Pending should be empty");
+ }
+
+ // Once all the live-ins don't change on mlocJoin(), we've reached a
+ // fixedpoint.
+}
+
+bool InstrRefBasedLDV::vlocDowngradeLattice(
+ const MachineBasicBlock &MBB, const DbgValue &OldLiveInLocation,
+ const SmallVectorImpl<InValueT> &Values, unsigned CurBlockRPONum) {
+ // Ranking value preference: see file level comment, the highest rank is
+ // a plain def, followed by PHI values in reverse post-order. Numerically,
+ // we assign all defs the rank '0', all PHIs their blocks RPO number plus
+ // one, and consider the lowest value the highest ranked.
+ int OldLiveInRank = BBNumToRPO[OldLiveInLocation.ID.getBlock()] + 1;
+ if (!OldLiveInLocation.ID.isPHI())
+ OldLiveInRank = 0;
+
+ // Allow any unresolvable conflict to be over-ridden.
+ if (OldLiveInLocation.Kind == DbgValue::NoVal) {
+ // Although if it was an unresolvable conflict from _this_ block, then
+ // all other seeking of downgrades and PHIs must have failed before hand.
+ if (OldLiveInLocation.BlockNo == (unsigned)MBB.getNumber())
+ return false;
+ OldLiveInRank = INT_MIN;
+ }
+
+ auto &InValue = *Values[0].second;
+
+ if (InValue.Kind == DbgValue::Const || InValue.Kind == DbgValue::NoVal)
+ return false;
+
+ unsigned ThisRPO = BBNumToRPO[InValue.ID.getBlock()];
+ int ThisRank = ThisRPO + 1;
+ if (!InValue.ID.isPHI())
+ ThisRank = 0;
+
+ // Too far down the lattice?
+ if (ThisRPO >= CurBlockRPONum)
+ return false;
+
+ // Higher in the lattice than what we've already explored?
+ if (ThisRank <= OldLiveInRank)
+ return false;
+
+ return true;
+}
+
+std::tuple<Optional<ValueIDNum>, bool> InstrRefBasedLDV::pickVPHILoc(
+ MachineBasicBlock &MBB, const DebugVariable &Var, const LiveIdxT &LiveOuts,
+ ValueIDNum **MOutLocs, ValueIDNum **MInLocs,
+ const SmallVectorImpl<MachineBasicBlock *> &BlockOrders) {
+ // Collect a set of locations from predecessor where its live-out value can
+ // be found.
+ SmallVector<SmallVector<LocIdx, 4>, 8> Locs;
+ unsigned NumLocs = MTracker->getNumLocs();
+ unsigned BackEdgesStart = 0;
+
+ for (auto p : BlockOrders) {
+ // Pick out where backedges start in the list of predecessors. Relies on
+ // BlockOrders being sorted by RPO.
+ if (BBToOrder[p] < BBToOrder[&MBB])
+ ++BackEdgesStart;
+
+ // For each predecessor, create a new set of locations.
+ Locs.resize(Locs.size() + 1);
+ unsigned ThisBBNum = p->getNumber();
+ auto LiveOutMap = LiveOuts.find(p);
+ if (LiveOutMap == LiveOuts.end())
+ // This predecessor isn't in scope, it must have no live-in/live-out
+ // locations.
+ continue;
+
+ auto It = LiveOutMap->second->find(Var);
+ if (It == LiveOutMap->second->end())
+ // There's no value recorded for this variable in this predecessor,
+ // leave an empty set of locations.
+ continue;
+
+ const DbgValue &OutVal = It->second;
+
+ if (OutVal.Kind == DbgValue::Const || OutVal.Kind == DbgValue::NoVal)
+ // Consts and no-values cannot have locations we can join on.
+ continue;
+
+ assert(OutVal.Kind == DbgValue::Proposed || OutVal.Kind == DbgValue::Def);
+ ValueIDNum ValToLookFor = OutVal.ID;
+
+ // Search the live-outs of the predecessor for the specified value.
+ for (unsigned int I = 0; I < NumLocs; ++I) {
+ if (MOutLocs[ThisBBNum][I] == ValToLookFor)
+ Locs.back().push_back(LocIdx(I));
+ }
+ }
+
+ // If there were no locations at all, return an empty result.
+ if (Locs.empty())
+ return std::tuple<Optional<ValueIDNum>, bool>(None, false);
+
+ // Lambda for seeking a common location within a range of location-sets.
+ using LocsIt = SmallVector<SmallVector<LocIdx, 4>, 8>::iterator;
+ auto SeekLocation =
+ [&Locs](llvm::iterator_range<LocsIt> SearchRange) -> Optional<LocIdx> {
+ // Starting with the first set of locations, take the intersection with
+ // subsequent sets.
+ SmallVector<LocIdx, 4> base = Locs[0];
+ for (auto &S : SearchRange) {
+ SmallVector<LocIdx, 4> new_base;
+ std::set_intersection(base.begin(), base.end(), S.begin(), S.end(),
+ std::inserter(new_base, new_base.begin()));
+ base = new_base;
+ }
+ if (base.empty())
+ return None;
+
+ // We now have a set of LocIdxes that contain the right output value in
+ // each of the predecessors. Pick the lowest; if there's a register loc,
+ // that'll be it.
+ return *base.begin();
+ };
+
+ // Search for a common location for all predecessors. If we can't, then fall
+ // back to only finding a common location between non-backedge predecessors.
+ bool ValidForAllLocs = true;
+ auto TheLoc = SeekLocation(Locs);
+ if (!TheLoc) {
+ ValidForAllLocs = false;
+ TheLoc =
+ SeekLocation(make_range(Locs.begin(), Locs.begin() + BackEdgesStart));
+ }
+
+ if (!TheLoc)
+ return std::tuple<Optional<ValueIDNum>, bool>(None, false);
+
+ // Return a PHI-value-number for the found location.
+ LocIdx L = *TheLoc;
+ ValueIDNum PHIVal = {(unsigned)MBB.getNumber(), 0, L};
+ return std::tuple<Optional<ValueIDNum>, bool>(PHIVal, ValidForAllLocs);
+}
+
+std::tuple<bool, bool> InstrRefBasedLDV::vlocJoin(
+ MachineBasicBlock &MBB, LiveIdxT &VLOCOutLocs, LiveIdxT &VLOCInLocs,
+ SmallPtrSet<const MachineBasicBlock *, 16> *VLOCVisited, unsigned BBNum,
+ const SmallSet<DebugVariable, 4> &AllVars, ValueIDNum **MOutLocs,
+ ValueIDNum **MInLocs,
+ SmallPtrSet<const MachineBasicBlock *, 8> &InScopeBlocks,
+ SmallPtrSet<const MachineBasicBlock *, 8> &BlocksToExplore,
+ DenseMap<DebugVariable, DbgValue> &InLocsT) {
+ bool DowngradeOccurred = false;
+
+ // To emulate VarLocBasedImpl, process this block if it's not in scope but
+ // _does_ assign a variable value. No live-ins for this scope are transferred
+ // in though, so we can return immediately.
+ if (InScopeBlocks.count(&MBB) == 0 && !ArtificialBlocks.count(&MBB)) {
+ if (VLOCVisited)
+ return std::tuple<bool, bool>(true, false);
+ return std::tuple<bool, bool>(false, false);
+ }
+
+ LLVM_DEBUG(dbgs() << "join MBB: " << MBB.getNumber() << "\n");
+ bool Changed = false;
+
+ // Find any live-ins computed in a prior iteration.
+ auto ILSIt = VLOCInLocs.find(&MBB);
+ assert(ILSIt != VLOCInLocs.end());
+ auto &ILS = *ILSIt->second;
+
+ // Order predecessors by RPOT order, for exploring them in that order.
+ SmallVector<MachineBasicBlock *, 8> BlockOrders;
+ for (auto p : MBB.predecessors())
+ BlockOrders.push_back(p);
+
+ auto Cmp = [&](MachineBasicBlock *A, MachineBasicBlock *B) {
+ return BBToOrder[A] < BBToOrder[B];
+ };
+
+ llvm::sort(BlockOrders, Cmp);
+
+ unsigned CurBlockRPONum = BBToOrder[&MBB];
+
+ // Force a re-visit to loop heads in the first dataflow iteration.
+ // FIXME: if we could "propose" Const values this wouldn't be needed,
+ // because they'd need to be confirmed before being emitted.
+ if (!BlockOrders.empty() &&
+ BBToOrder[BlockOrders[BlockOrders.size() - 1]] >= CurBlockRPONum &&
+ VLOCVisited)
+ DowngradeOccurred = true;
+
+ auto ConfirmValue = [&InLocsT](const DebugVariable &DV, DbgValue VR) {
+ auto Result = InLocsT.insert(std::make_pair(DV, VR));
+ (void)Result;
+ assert(Result.second);
+ };
+
+ auto ConfirmNoVal = [&ConfirmValue, &MBB](const DebugVariable &Var, const DbgValueProperties &Properties) {
+ DbgValue NoLocPHIVal(MBB.getNumber(), Properties, DbgValue::NoVal);
+
+ ConfirmValue(Var, NoLocPHIVal);
+ };
+
+ // Attempt to join the values for each variable.
+ for (auto &Var : AllVars) {
+ // Collect all the DbgValues for this variable.
+ SmallVector<InValueT, 8> Values;
+ bool Bail = false;
+ unsigned BackEdgesStart = 0;
+ for (auto p : BlockOrders) {
+ // If the predecessor isn't in scope / to be explored, we'll never be
+ // able to join any locations.
+ if (!BlocksToExplore.contains(p)) {
+ Bail = true;
+ break;
+ }
+
+ // Don't attempt to handle unvisited predecessors: they're implicitly
+ // "unknown"s in the lattice.
+ if (VLOCVisited && !VLOCVisited->count(p))
+ continue;
+
+ // If the predecessors OutLocs is absent, there's not much we can do.
+ auto OL = VLOCOutLocs.find(p);
+ if (OL == VLOCOutLocs.end()) {
+ Bail = true;
+ break;
+ }
+
+ // No live-out value for this predecessor also means we can't produce
+ // a joined value.
+ auto VIt = OL->second->find(Var);
+ if (VIt == OL->second->end()) {
+ Bail = true;
+ break;
+ }
+
+ // Keep track of where back-edges begin in the Values vector. Relies on
+ // BlockOrders being sorted by RPO.
+ unsigned ThisBBRPONum = BBToOrder[p];
+ if (ThisBBRPONum < CurBlockRPONum)
+ ++BackEdgesStart;
+
+ Values.push_back(std::make_pair(p, &VIt->second));
+ }
+
+ // If there were no values, or one of the predecessors couldn't have a
+ // value, then give up immediately. It's not safe to produce a live-in
+ // value.
+ if (Bail || Values.size() == 0)
+ continue;
+
+ // Enumeration identifying the current state of the predecessors values.
+ enum {
+ Unset = 0,
+ Agreed, // All preds agree on the variable value.
+ PropDisagree, // All preds agree, but the value kind is Proposed in some.
+ BEDisagree, // Only back-edges disagree on variable value.
+ PHINeeded, // Non-back-edge predecessors have conflicing values.
+ NoSolution // Conflicting Value metadata makes solution impossible.
+ } OurState = Unset;
+
+ // All (non-entry) blocks have at least one non-backedge predecessor.
+ // Pick the variable value from the first of these, to compare against
+ // all others.
+ const DbgValue &FirstVal = *Values[0].second;
+ const ValueIDNum &FirstID = FirstVal.ID;
+
+ // Scan for variable values that can't be resolved: if they have different
+ // DIExpressions, different indirectness, or are mixed constants /
+ // non-constants.
+ for (auto &V : Values) {
+ if (V.second->Properties != FirstVal.Properties)
+ OurState = NoSolution;
+ if (V.second->Kind == DbgValue::Const && FirstVal.Kind != DbgValue::Const)
+ OurState = NoSolution;
+ }
+
+ // Flags diagnosing _how_ the values disagree.
+ bool NonBackEdgeDisagree = false;
+ bool DisagreeOnPHINess = false;
+ bool IDDisagree = false;
+ bool Disagree = false;
+ if (OurState == Unset) {
+ for (auto &V : Values) {
+ if (*V.second == FirstVal)
+ continue; // No disagreement.
+
+ Disagree = true;
+
+ // Flag whether the value number actually diagrees.
+ if (V.second->ID != FirstID)
+ IDDisagree = true;
+
+ // Distinguish whether disagreement happens in backedges or not.
+ // Relies on Values (and BlockOrders) being sorted by RPO.
+ unsigned ThisBBRPONum = BBToOrder[V.first];
+ if (ThisBBRPONum < CurBlockRPONum)
+ NonBackEdgeDisagree = true;
+
+ // Is there a difference in whether the value is definite or only
+ // proposed?
+ if (V.second->Kind != FirstVal.Kind &&
+ (V.second->Kind == DbgValue::Proposed ||
+ V.second->Kind == DbgValue::Def) &&
+ (FirstVal.Kind == DbgValue::Proposed ||
+ FirstVal.Kind == DbgValue::Def))
+ DisagreeOnPHINess = true;
+ }
+
+ // Collect those flags together and determine an overall state for
+ // what extend the predecessors agree on a live-in value.
+ if (!Disagree)
+ OurState = Agreed;
+ else if (!IDDisagree && DisagreeOnPHINess)
+ OurState = PropDisagree;
+ else if (!NonBackEdgeDisagree)
+ OurState = BEDisagree;
+ else
+ OurState = PHINeeded;
+ }
+
+ // An extra indicator: if we only disagree on whether the value is a
+ // Def, or proposed, then also flag whether that disagreement happens
+ // in backedges only.
+ bool PropOnlyInBEs = Disagree && !IDDisagree && DisagreeOnPHINess &&
+ !NonBackEdgeDisagree && FirstVal.Kind == DbgValue::Def;
+
+ const auto &Properties = FirstVal.Properties;
+
+ auto OldLiveInIt = ILS.find(Var);
+ const DbgValue *OldLiveInLocation =
+ (OldLiveInIt != ILS.end()) ? &OldLiveInIt->second : nullptr;
+
+ bool OverRide = false;
+ if (OurState == BEDisagree && OldLiveInLocation) {
+ // Only backedges disagree: we can consider downgrading. If there was a
+ // previous live-in value, use it to work out whether the current
+ // incoming value represents a lattice downgrade or not.
+ OverRide =
+ vlocDowngradeLattice(MBB, *OldLiveInLocation, Values, CurBlockRPONum);
+ }
+
+ // Use the current state of predecessor agreement and other flags to work
+ // out what to do next. Possibilities include:
+ // * Accept a value all predecessors agree on, or accept one that
+ // represents a step down the exploration lattice,
+ // * Use a PHI value number, if one can be found,
+ // * Propose a PHI value number, and see if it gets confirmed later,
+ // * Emit a 'NoVal' value, indicating we couldn't resolve anything.
+ if (OurState == Agreed) {
+ // Easiest solution: all predecessors agree on the variable value.
+ ConfirmValue(Var, FirstVal);
+ } else if (OurState == BEDisagree && OverRide) {
+ // Only backedges disagree, and the other predecessors have produced
+ // a new live-in value further down the exploration lattice.
+ DowngradeOccurred = true;
+ ConfirmValue(Var, FirstVal);
+ } else if (OurState == PropDisagree) {
+ // Predecessors agree on value, but some say it's only a proposed value.
+ // Propagate it as proposed: unless it was proposed in this block, in
+ // which case we're able to confirm the value.
+ if (FirstID.getBlock() == (uint64_t)MBB.getNumber() && FirstID.isPHI()) {
+ ConfirmValue(Var, DbgValue(FirstID, Properties, DbgValue::Def));
+ } else if (PropOnlyInBEs) {
+ // If only backedges disagree, a higher (in RPO) block confirmed this
+ // location, and we need to propagate it into this loop.
+ ConfirmValue(Var, DbgValue(FirstID, Properties, DbgValue::Def));
+ } else {
+ // Otherwise; a Def meeting a Proposed is still a Proposed.
+ ConfirmValue(Var, DbgValue(FirstID, Properties, DbgValue::Proposed));
+ }
+ } else if ((OurState == PHINeeded || OurState == BEDisagree)) {
+ // Predecessors disagree and can't be downgraded: this can only be
+ // solved with a PHI. Use pickVPHILoc to go look for one.
+ Optional<ValueIDNum> VPHI;
+ bool AllEdgesVPHI = false;
+ std::tie(VPHI, AllEdgesVPHI) =
+ pickVPHILoc(MBB, Var, VLOCOutLocs, MOutLocs, MInLocs, BlockOrders);
+
+ if (VPHI && AllEdgesVPHI) {
+ // There's a PHI value that's valid for all predecessors -- we can use
+ // it. If any of the non-backedge predecessors have proposed values
+ // though, this PHI is also only proposed, until the predecessors are
+ // confirmed.
+ DbgValue::KindT K = DbgValue::Def;
+ for (unsigned int I = 0; I < BackEdgesStart; ++I)
+ if (Values[I].second->Kind == DbgValue::Proposed)
+ K = DbgValue::Proposed;
+
+ ConfirmValue(Var, DbgValue(*VPHI, Properties, K));
+ } else if (VPHI) {
+ // There's a PHI value, but it's only legal for backedges. Leave this
+ // as a proposed PHI value: it might come back on the backedges,
+ // and allow us to confirm it in the future.
+ DbgValue NoBEValue = DbgValue(*VPHI, Properties, DbgValue::Proposed);
+ ConfirmValue(Var, NoBEValue);
+ } else {
+ ConfirmNoVal(Var, Properties);
+ }
+ } else {
+ // Otherwise: we don't know. Emit a "phi but no real loc" phi.
+ ConfirmNoVal(Var, Properties);
+ }
+ }
+
+ // Store newly calculated in-locs into VLOCInLocs, if they've changed.
+ Changed = ILS != InLocsT;
+ if (Changed)
+ ILS = InLocsT;
+
+ return std::tuple<bool, bool>(Changed, DowngradeOccurred);
+}
+
+void InstrRefBasedLDV::vlocDataflow(
+ const LexicalScope *Scope, const DILocation *DILoc,
+ const SmallSet<DebugVariable, 4> &VarsWeCareAbout,
+ SmallPtrSetImpl<MachineBasicBlock *> &AssignBlocks, LiveInsT &Output,
+ ValueIDNum **MOutLocs, ValueIDNum **MInLocs,
+ SmallVectorImpl<VLocTracker> &AllTheVLocs) {
+ // This method is much like mlocDataflow: but focuses on a single
+ // LexicalScope at a time. Pick out a set of blocks and variables that are
+ // to have their value assignments solved, then run our dataflow algorithm
+ // until a fixedpoint is reached.
+ std::priority_queue<unsigned int, std::vector<unsigned int>,
+ std::greater<unsigned int>>
+ Worklist, Pending;
+ SmallPtrSet<MachineBasicBlock *, 16> OnWorklist, OnPending;
+
+ // The set of blocks we'll be examining.
+ SmallPtrSet<const MachineBasicBlock *, 8> BlocksToExplore;
+
+ // The order in which to examine them (RPO).
+ SmallVector<MachineBasicBlock *, 8> BlockOrders;
+
+ // RPO ordering function.
+ auto Cmp = [&](MachineBasicBlock *A, MachineBasicBlock *B) {
+ return BBToOrder[A] < BBToOrder[B];
+ };
+
+ LS.getMachineBasicBlocks(DILoc, BlocksToExplore);
+
+ // A separate container to distinguish "blocks we're exploring" versus
+ // "blocks that are potentially in scope. See comment at start of vlocJoin.
+ SmallPtrSet<const MachineBasicBlock *, 8> InScopeBlocks = BlocksToExplore;
+
+ // Old LiveDebugValues tracks variable locations that come out of blocks
+ // not in scope, where DBG_VALUEs occur. This is something we could
+ // legitimately ignore, but lets allow it for now.
+ if (EmulateOldLDV)
+ BlocksToExplore.insert(AssignBlocks.begin(), AssignBlocks.end());
+
+ // We also need to propagate variable values through any artificial blocks
+ // that immediately follow blocks in scope.
+ DenseSet<const MachineBasicBlock *> ToAdd;
+
+ // Helper lambda: For a given block in scope, perform a depth first search
+ // of all the artificial successors, adding them to the ToAdd collection.
+ auto AccumulateArtificialBlocks =
+ [this, &ToAdd, &BlocksToExplore,
+ &InScopeBlocks](const MachineBasicBlock *MBB) {
+ // Depth-first-search state: each node is a block and which successor
+ // we're currently exploring.
+ SmallVector<std::pair<const MachineBasicBlock *,
+ MachineBasicBlock::const_succ_iterator>,
+ 8>
+ DFS;
+
+ // Find any artificial successors not already tracked.
+ for (auto *succ : MBB->successors()) {
+ if (BlocksToExplore.count(succ) || InScopeBlocks.count(succ))
+ continue;
+ if (!ArtificialBlocks.count(succ))
+ continue;
+ DFS.push_back(std::make_pair(succ, succ->succ_begin()));
+ ToAdd.insert(succ);
+ }
+
+ // Search all those blocks, depth first.
+ while (!DFS.empty()) {
+ const MachineBasicBlock *CurBB = DFS.back().first;
+ MachineBasicBlock::const_succ_iterator &CurSucc = DFS.back().second;
+ // Walk back if we've explored this blocks successors to the end.
+ if (CurSucc == CurBB->succ_end()) {
+ DFS.pop_back();
+ continue;
+ }
+
+ // If the current successor is artificial and unexplored, descend into
+ // it.
+ if (!ToAdd.count(*CurSucc) && ArtificialBlocks.count(*CurSucc)) {
+ DFS.push_back(std::make_pair(*CurSucc, (*CurSucc)->succ_begin()));
+ ToAdd.insert(*CurSucc);
+ continue;
+ }
+
+ ++CurSucc;
+ }
+ };
+
+ // Search in-scope blocks and those containing a DBG_VALUE from this scope
+ // for artificial successors.
+ for (auto *MBB : BlocksToExplore)
+ AccumulateArtificialBlocks(MBB);
+ for (auto *MBB : InScopeBlocks)
+ AccumulateArtificialBlocks(MBB);
+
+ BlocksToExplore.insert(ToAdd.begin(), ToAdd.end());
+ InScopeBlocks.insert(ToAdd.begin(), ToAdd.end());
+
+ // Single block scope: not interesting! No propagation at all. Note that
+ // this could probably go above ArtificialBlocks without damage, but
+ // that then produces output differences from original-live-debug-values,
+ // which propagates from a single block into many artificial ones.
+ if (BlocksToExplore.size() == 1)
+ return;
+
+ // Picks out relevants blocks RPO order and sort them.
+ for (auto *MBB : BlocksToExplore)
+ BlockOrders.push_back(const_cast<MachineBasicBlock *>(MBB));
+
+ llvm::sort(BlockOrders, Cmp);
+ unsigned NumBlocks = BlockOrders.size();
+
+ // Allocate some vectors for storing the live ins and live outs. Large.
+ SmallVector<DenseMap<DebugVariable, DbgValue>, 32> LiveIns, LiveOuts;
+ LiveIns.resize(NumBlocks);
+ LiveOuts.resize(NumBlocks);
+
+ // Produce by-MBB indexes of live-in/live-outs, to ease lookup within
+ // vlocJoin.
+ LiveIdxT LiveOutIdx, LiveInIdx;
+ LiveOutIdx.reserve(NumBlocks);
+ LiveInIdx.reserve(NumBlocks);
+ for (unsigned I = 0; I < NumBlocks; ++I) {
+ LiveOutIdx[BlockOrders[I]] = &LiveOuts[I];
+ LiveInIdx[BlockOrders[I]] = &LiveIns[I];
+ }
+
+ for (auto *MBB : BlockOrders) {
+ Worklist.push(BBToOrder[MBB]);
+ OnWorklist.insert(MBB);
+ }
+
+ // Iterate over all the blocks we selected, propagating variable values.
+ bool FirstTrip = true;
+ SmallPtrSet<const MachineBasicBlock *, 16> VLOCVisited;
+ while (!Worklist.empty() || !Pending.empty()) {
+ while (!Worklist.empty()) {
+ auto *MBB = OrderToBB[Worklist.top()];
+ CurBB = MBB->getNumber();
+ Worklist.pop();
+
+ DenseMap<DebugVariable, DbgValue> JoinedInLocs;
+
+ // Join values from predecessors. Updates LiveInIdx, and writes output
+ // into JoinedInLocs.
+ bool InLocsChanged, DowngradeOccurred;
+ std::tie(InLocsChanged, DowngradeOccurred) = vlocJoin(
+ *MBB, LiveOutIdx, LiveInIdx, (FirstTrip) ? &VLOCVisited : nullptr,
+ CurBB, VarsWeCareAbout, MOutLocs, MInLocs, InScopeBlocks,
+ BlocksToExplore, JoinedInLocs);
+
+ bool FirstVisit = VLOCVisited.insert(MBB).second;
+
+ // Always explore transfer function if inlocs changed, or if we've not
+ // visited this block before.
+ InLocsChanged |= FirstVisit;
+
+ // If a downgrade occurred, book us in for re-examination on the next
+ // iteration.
+ if (DowngradeOccurred && OnPending.insert(MBB).second)
+ Pending.push(BBToOrder[MBB]);
+
+ if (!InLocsChanged)
+ continue;
+
+ // Do transfer function.
+ auto &VTracker = AllTheVLocs[MBB->getNumber()];
+ for (auto &Transfer : VTracker.Vars) {
+ // Is this var we're mangling in this scope?
+ if (VarsWeCareAbout.count(Transfer.first)) {
+ // Erase on empty transfer (DBG_VALUE $noreg).
+ if (Transfer.second.Kind == DbgValue::Undef) {
+ JoinedInLocs.erase(Transfer.first);
+ } else {
+ // Insert new variable value; or overwrite.
+ auto NewValuePair = std::make_pair(Transfer.first, Transfer.second);
+ auto Result = JoinedInLocs.insert(NewValuePair);
+ if (!Result.second)
+ Result.first->second = Transfer.second;
+ }
+ }
+ }
+
+ // Did the live-out locations change?
+ bool OLChanged = JoinedInLocs != *LiveOutIdx[MBB];
+
+ // If they haven't changed, there's no need to explore further.
+ if (!OLChanged)
+ continue;
+
+ // Commit to the live-out record.
+ *LiveOutIdx[MBB] = JoinedInLocs;
+
+ // We should visit all successors. Ensure we'll visit any non-backedge
+ // successors during this dataflow iteration; book backedge successors
+ // to be visited next time around.
+ for (auto s : MBB->successors()) {
+ // Ignore out of scope / not-to-be-explored successors.
+ if (LiveInIdx.find(s) == LiveInIdx.end())
+ continue;
+
+ if (BBToOrder[s] > BBToOrder[MBB]) {
+ if (OnWorklist.insert(s).second)
+ Worklist.push(BBToOrder[s]);
+ } else if (OnPending.insert(s).second && (FirstTrip || OLChanged)) {
+ Pending.push(BBToOrder[s]);
+ }
+ }
+ }
+ Worklist.swap(Pending);
+ std::swap(OnWorklist, OnPending);
+ OnPending.clear();
+ assert(Pending.empty());
+ FirstTrip = false;
+ }
+
+ // Dataflow done. Now what? Save live-ins. Ignore any that are still marked
+ // as being variable-PHIs, because those did not have their machine-PHI
+ // value confirmed. Such variable values are places that could have been
+ // PHIs, but are not.
+ for (auto *MBB : BlockOrders) {
+ auto &VarMap = *LiveInIdx[MBB];
+ for (auto &P : VarMap) {
+ if (P.second.Kind == DbgValue::Proposed ||
+ P.second.Kind == DbgValue::NoVal)
+ continue;
+ Output[MBB->getNumber()].push_back(P);
+ }
+ }
+
+ BlockOrders.clear();
+ BlocksToExplore.clear();
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void InstrRefBasedLDV::dump_mloc_transfer(
+ const MLocTransferMap &mloc_transfer) const {
+ for (auto &P : mloc_transfer) {
+ std::string foo = MTracker->LocIdxToName(P.first);
+ std::string bar = MTracker->IDAsString(P.second);
+ dbgs() << "Loc " << foo << " --> " << bar << "\n";
+ }
+}
+#endif
+
+void InstrRefBasedLDV::emitLocations(
+ MachineFunction &MF, LiveInsT SavedLiveIns, ValueIDNum **MInLocs,
+ DenseMap<DebugVariable, unsigned> &AllVarsNumbering) {
+ TTracker = new TransferTracker(TII, MTracker, MF, *TRI, CalleeSavedRegs);
+ unsigned NumLocs = MTracker->getNumLocs();
+
+ // For each block, load in the machine value locations and variable value
+ // live-ins, then step through each instruction in the block. New DBG_VALUEs
+ // to be inserted will be created along the way.
+ for (MachineBasicBlock &MBB : MF) {
+ unsigned bbnum = MBB.getNumber();
+ MTracker->reset();
+ MTracker->loadFromArray(MInLocs[bbnum], bbnum);
+ TTracker->loadInlocs(MBB, MInLocs[bbnum], SavedLiveIns[MBB.getNumber()],
+ NumLocs);
+
+ CurBB = bbnum;
+ CurInst = 1;
+ for (auto &MI : MBB) {
+ process(MI);
+ TTracker->checkInstForNewValues(CurInst, MI.getIterator());
+ ++CurInst;
+ }
+ }
+
+ // We have to insert DBG_VALUEs in a consistent order, otherwise they appeaer
+ // in DWARF in different orders. Use the order that they appear when walking
+ // through each block / each instruction, stored in AllVarsNumbering.
+ auto OrderDbgValues = [&](const MachineInstr *A,
+ const MachineInstr *B) -> bool {
+ DebugVariable VarA(A->getDebugVariable(), A->getDebugExpression(),
+ A->getDebugLoc()->getInlinedAt());
+ DebugVariable VarB(B->getDebugVariable(), B->getDebugExpression(),
+ B->getDebugLoc()->getInlinedAt());
+ return AllVarsNumbering.find(VarA)->second <
+ AllVarsNumbering.find(VarB)->second;
+ };
+
+ // Go through all the transfers recorded in the TransferTracker -- this is
+ // both the live-ins to a block, and any movements of values that happen
+ // in the middle.
+ for (auto &P : TTracker->Transfers) {
+ // Sort them according to appearance order.
+ llvm::sort(P.Insts, OrderDbgValues);
+ // Insert either before or after the designated point...
+ if (P.MBB) {
+ MachineBasicBlock &MBB = *P.MBB;
+ for (auto *MI : P.Insts) {
+ MBB.insert(P.Pos, MI);
+ }
+ } else {
+ MachineBasicBlock &MBB = *P.Pos->getParent();
+ for (auto *MI : P.Insts) {
+ MBB.insertAfter(P.Pos, MI);
+ }
+ }
+ }
+}
+
+void InstrRefBasedLDV::initialSetup(MachineFunction &MF) {
+ // Build some useful data structures.
+ auto hasNonArtificialLocation = [](const MachineInstr &MI) -> bool {
+ if (const DebugLoc &DL = MI.getDebugLoc())
+ return DL.getLine() != 0;
+ return false;
+ };
+ // Collect a set of all the artificial blocks.
+ for (auto &MBB : MF)
+ if (none_of(MBB.instrs(), hasNonArtificialLocation))
+ ArtificialBlocks.insert(&MBB);
+
+ // Compute mappings of block <=> RPO order.
+ ReversePostOrderTraversal<MachineFunction *> RPOT(&MF);
+ unsigned int RPONumber = 0;
+ for (auto RI = RPOT.begin(), RE = RPOT.end(); RI != RE; ++RI) {
+ OrderToBB[RPONumber] = *RI;
+ BBToOrder[*RI] = RPONumber;
+ BBNumToRPO[(*RI)->getNumber()] = RPONumber;
+ ++RPONumber;
+ }
+}
+
+/// Calculate the liveness information for the given machine function and
+/// extend ranges across basic blocks.
+bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
+ TargetPassConfig *TPC) {
+ // No subprogram means this function contains no debuginfo.
+ if (!MF.getFunction().getSubprogram())
+ return false;
+
+ LLVM_DEBUG(dbgs() << "\nDebug Range Extension\n");
+ this->TPC = TPC;
+
+ TRI = MF.getSubtarget().getRegisterInfo();
+ TII = MF.getSubtarget().getInstrInfo();
+ TFI = MF.getSubtarget().getFrameLowering();
+ TFI->getCalleeSaves(MF, CalleeSavedRegs);
+ LS.initialize(MF);
+
+ MTracker =
+ new MLocTracker(MF, *TII, *TRI, *MF.getSubtarget().getTargetLowering());
+ VTracker = nullptr;
+ TTracker = nullptr;
+
+ SmallVector<MLocTransferMap, 32> MLocTransfer;
+ SmallVector<VLocTracker, 8> vlocs;
+ LiveInsT SavedLiveIns;
+
+ int MaxNumBlocks = -1;
+ for (auto &MBB : MF)
+ MaxNumBlocks = std::max(MBB.getNumber(), MaxNumBlocks);
+ assert(MaxNumBlocks >= 0);
+ ++MaxNumBlocks;
+
+ MLocTransfer.resize(MaxNumBlocks);
+ vlocs.resize(MaxNumBlocks);
+ SavedLiveIns.resize(MaxNumBlocks);
+
+ initialSetup(MF);
+
+ produceMLocTransferFunction(MF, MLocTransfer, MaxNumBlocks);
+
+ // Allocate and initialize two array-of-arrays for the live-in and live-out
+ // machine values. The outer dimension is the block number; while the inner
+ // dimension is a LocIdx from MLocTracker.
+ ValueIDNum **MOutLocs = new ValueIDNum *[MaxNumBlocks];
+ ValueIDNum **MInLocs = new ValueIDNum *[MaxNumBlocks];
+ unsigned NumLocs = MTracker->getNumLocs();
+ for (int i = 0; i < MaxNumBlocks; ++i) {
+ MOutLocs[i] = new ValueIDNum[NumLocs];
+ MInLocs[i] = new ValueIDNum[NumLocs];
+ }
+
+ // Solve the machine value dataflow problem using the MLocTransfer function,
+ // storing the computed live-ins / live-outs into the array-of-arrays. We use
+ // both live-ins and live-outs for decision making in the variable value
+ // dataflow problem.
+ mlocDataflow(MInLocs, MOutLocs, MLocTransfer);
+
+ // Walk back through each block / instruction, collecting DBG_VALUE
+ // instructions and recording what machine value their operands refer to.
+ for (auto &OrderPair : OrderToBB) {
+ MachineBasicBlock &MBB = *OrderPair.second;
+ CurBB = MBB.getNumber();
+ VTracker = &vlocs[CurBB];
+ VTracker->MBB = &MBB;
+ MTracker->loadFromArray(MInLocs[CurBB], CurBB);
+ CurInst = 1;
+ for (auto &MI : MBB) {
+ process(MI);
+ ++CurInst;
+ }
+ MTracker->reset();
+ }
+
+ // Number all variables in the order that they appear, to be used as a stable
+ // insertion order later.
+ DenseMap<DebugVariable, unsigned> AllVarsNumbering;
+
+ // Map from one LexicalScope to all the variables in that scope.
+ DenseMap<const LexicalScope *, SmallSet<DebugVariable, 4>> ScopeToVars;
+
+ // Map from One lexical scope to all blocks in that scope.
+ DenseMap<const LexicalScope *, SmallPtrSet<MachineBasicBlock *, 4>>
+ ScopeToBlocks;
+
+ // Store a DILocation that describes a scope.
+ DenseMap<const LexicalScope *, const DILocation *> ScopeToDILocation;
+
+ // To mirror old LiveDebugValues, enumerate variables in RPOT order. Otherwise
+ // the order is unimportant, it just has to be stable.
+ for (unsigned int I = 0; I < OrderToBB.size(); ++I) {
+ auto *MBB = OrderToBB[I];
+ auto *VTracker = &vlocs[MBB->getNumber()];
+ // Collect each variable with a DBG_VALUE in this block.
+ for (auto &idx : VTracker->Vars) {
+ const auto &Var = idx.first;
+ const DILocation *ScopeLoc = VTracker->Scopes[Var];
+ assert(ScopeLoc != nullptr);
+ auto *Scope = LS.findLexicalScope(ScopeLoc);
+
+ // No insts in scope -> shouldn't have been recorded.
+ assert(Scope != nullptr);
+
+ AllVarsNumbering.insert(std::make_pair(Var, AllVarsNumbering.size()));
+ ScopeToVars[Scope].insert(Var);
+ ScopeToBlocks[Scope].insert(VTracker->MBB);
+ ScopeToDILocation[Scope] = ScopeLoc;
+ }
+ }
+
+ // OK. Iterate over scopes: there might be something to be said for
+ // ordering them by size/locality, but that's for the future. For each scope,
+ // solve the variable value problem, producing a map of variables to values
+ // in SavedLiveIns.
+ for (auto &P : ScopeToVars) {
+ vlocDataflow(P.first, ScopeToDILocation[P.first], P.second,
+ ScopeToBlocks[P.first], SavedLiveIns, MOutLocs, MInLocs,
+ vlocs);
+ }
+
+ // Using the computed value locations and variable values for each block,
+ // create the DBG_VALUE instructions representing the extended variable
+ // locations.
+ emitLocations(MF, SavedLiveIns, MInLocs, AllVarsNumbering);
+
+ for (int Idx = 0; Idx < MaxNumBlocks; ++Idx) {
+ delete[] MOutLocs[Idx];
+ delete[] MInLocs[Idx];
+ }
+ delete[] MOutLocs;
+ delete[] MInLocs;
+
+ // Did we actually make any changes? If we created any DBG_VALUEs, then yes.
+ bool Changed = TTracker->Transfers.size() != 0;
+
+ delete MTracker;
+ delete TTracker;
+ MTracker = nullptr;
+ VTracker = nullptr;
+ TTracker = nullptr;
+
+ ArtificialBlocks.clear();
+ OrderToBB.clear();
+ BBToOrder.clear();
+ BBNumToRPO.clear();
+ DebugInstrNumToInstr.clear();
+
+ return Changed;
+}
+
+LDVImpl *llvm::makeInstrRefBasedLiveDebugValues() {
+ return new InstrRefBasedLDV();
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp
new file mode 100644
index 000000000000..770c46ec8436
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp
@@ -0,0 +1,97 @@
+//===- LiveDebugValues.cpp - Tracking Debug Value MIs ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "LiveDebugValues.h"
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Target/TargetMachine.h"
+
+/// \file LiveDebugValues.cpp
+///
+/// The LiveDebugValues pass extends the range of variable locations
+/// (specified by DBG_VALUE instructions) from single blocks to successors
+/// and any other code locations where the variable location is valid.
+/// There are currently two implementations: the "VarLoc" implementation
+/// explicitly tracks the location of a variable, while the "InstrRef"
+/// implementation tracks the values defined by instructions through locations.
+///
+/// This file implements neither; it merely registers the pass, allows the
+/// user to pick which implementation will be used to propagate variable
+/// locations.
+
+#define DEBUG_TYPE "livedebugvalues"
+
+using namespace llvm;
+
+/// Generic LiveDebugValues pass. Calls through to VarLocBasedLDV or
+/// InstrRefBasedLDV to perform location propagation, via the LDVImpl
+/// base class.
+class LiveDebugValues : public MachineFunctionPass {
+public:
+ static char ID;
+
+ LiveDebugValues();
+ ~LiveDebugValues() {
+ if (TheImpl)
+ delete TheImpl;
+ }
+
+ /// Calculate the liveness information for the given machine function.
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoVRegs);
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+private:
+ LDVImpl *TheImpl;
+ TargetPassConfig *TPC;
+};
+
+char LiveDebugValues::ID = 0;
+
+char &llvm::LiveDebugValuesID = LiveDebugValues::ID;
+
+INITIALIZE_PASS(LiveDebugValues, DEBUG_TYPE, "Live DEBUG_VALUE analysis", false,
+ false)
+
+/// Default construct and initialize the pass.
+LiveDebugValues::LiveDebugValues() : MachineFunctionPass(ID) {
+ initializeLiveDebugValuesPass(*PassRegistry::getPassRegistry());
+ TheImpl = nullptr;
+}
+
+bool LiveDebugValues::runOnMachineFunction(MachineFunction &MF) {
+ if (!TheImpl) {
+ TPC = getAnalysisIfAvailable<TargetPassConfig>();
+
+ bool InstrRefBased = false;
+ if (TPC) {
+ auto &TM = TPC->getTM<TargetMachine>();
+ InstrRefBased = TM.Options.ValueTrackingVariableLocations;
+ }
+
+ if (InstrRefBased)
+ TheImpl = llvm::makeInstrRefBasedLiveDebugValues();
+ else
+ TheImpl = llvm::makeVarLocBasedLiveDebugValues();
+ }
+
+ return TheImpl->ExtendRanges(MF, TPC);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h
new file mode 100644
index 000000000000..6b05bc68d74d
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h
@@ -0,0 +1,32 @@
+//===- LiveDebugValues.cpp - Tracking Debug Value MIs ---------*- C++ -*---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+
+namespace llvm {
+
+// Inline namespace for types / symbols shared between different
+// LiveDebugValues implementations.
+inline namespace SharedLiveDebugValues {
+
+// Expose a base class for LiveDebugValues interfaces to inherit from. This
+// allows the generic LiveDebugValues pass handles to call into the
+// implementation.
+class LDVImpl {
+public:
+ virtual bool ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC) = 0;
+ virtual ~LDVImpl() {}
+};
+
+} // namespace SharedLiveDebugValues
+
+// Factory functions for LiveDebugValues implementations.
+extern LDVImpl *makeVarLocBasedLiveDebugValues();
+extern LDVImpl *makeInstrRefBasedLiveDebugValues();
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp
index 07a275b546f6..e2daa46fe6b9 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp
@@ -1,4 +1,4 @@
-//===- LiveDebugValues.cpp - Tracking Debug Value MIs ---------------------===//
+//===- VarLocBasedImpl.cpp - Tracking Debug Value MIs with VarLoc class----===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
///
-/// \file LiveDebugValues.cpp
+/// \file VarLocBasedImpl.cpp
///
/// LiveDebugValues is an optimistic "available expressions" dataflow
/// algorithm. The set of expressions is the set of machine locations
@@ -17,7 +17,12 @@
/// DebugVariable, and continues until that location is clobbered or
/// re-specified by a different DBG_VALUE for the same DebugVariable.
///
-/// The cannonical "available expressions" problem doesn't have expression
+/// The output of LiveDebugValues is additional DBG_VALUE instructions,
+/// placed to extend variable locations as far they're available. This file
+/// and the VarLocBasedLDV class is an implementation that explicitly tracks
+/// locations, using the VarLoc class.
+///
+/// The canonical "available expressions" problem doesn't have expression
/// clobbering, instead when a variable is re-assigned, any expressions using
/// that variable get invalidated. LiveDebugValues can map onto "available
/// expressions" by having every register represented by a variable, which is
@@ -101,6 +106,8 @@
///
//===----------------------------------------------------------------------===//
+#include "LiveDebugValues.h"
+
#include "llvm/ADT/CoalescingBitVector.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/PostOrderIterator.h"
@@ -138,6 +145,7 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/TypeSize.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include <algorithm>
@@ -264,11 +272,12 @@ struct LocIndex {
}
};
-class LiveDebugValues : public MachineFunctionPass {
+class VarLocBasedLDV : public LDVImpl {
private:
const TargetRegisterInfo *TRI;
const TargetInstrInfo *TII;
const TargetFrameLowering *TFI;
+ TargetPassConfig *TPC;
BitVector CalleeSavedRegs;
LexicalScopes LS;
VarLocSet::Allocator Alloc;
@@ -284,7 +293,7 @@ private:
// register and an offset.
struct SpillLoc {
unsigned SpillBase;
- int SpillOffset;
+ StackOffset SpillOffset;
bool operator==(const SpillLoc &Other) const {
return SpillBase == Other.SpillBase && SpillOffset == Other.SpillOffset;
}
@@ -315,21 +324,20 @@ private:
/// The value location. Stored separately to avoid repeatedly
/// extracting it from MI.
- union {
+ union LocUnion {
uint64_t RegNo;
SpillLoc SpillLocation;
uint64_t Hash;
int64_t Immediate;
const ConstantFP *FPImm;
const ConstantInt *CImm;
+ LocUnion() : Hash(0) {}
} Loc;
VarLoc(const MachineInstr &MI, LexicalScopes &LS)
: Var(MI.getDebugVariable(), MI.getDebugExpression(),
MI.getDebugLoc()->getInlinedAt()),
Expr(MI.getDebugExpression()), MI(MI) {
- static_assert((sizeof(Loc) == sizeof(uint64_t)),
- "hash does not cover all members of Loc");
assert(MI.isDebugValue() && "not a DBG_VALUE");
assert(MI.getNumOperands() == 4 && "malformed DBG_VALUE");
if (int RegNo = isDbgValueDescribedByReg(MI)) {
@@ -405,7 +413,7 @@ private:
/// Take the variable described by DBG_VALUE MI, and create a VarLoc
/// locating it in the specified spill location.
static VarLoc CreateSpillLoc(const MachineInstr &MI, unsigned SpillBase,
- int SpillOffset, LexicalScopes &LS) {
+ StackOffset SpillOffset, LexicalScopes &LS) {
VarLoc VL(MI, LS);
assert(VL.Kind == RegisterKind);
VL.Kind = SpillLocKind;
@@ -442,7 +450,8 @@ private:
// Use the original DBG_VALUEs expression to build the spilt location
// on top of. FIXME: spill locations created before this pass runs
// are not recognized, and not handled here.
- auto *SpillExpr = DIExpression::prepend(
+ auto *TRI = MF.getSubtarget().getRegisterInfo();
+ auto *SpillExpr = TRI->prependOffsetExpression(
DIExpr, DIExpression::ApplyOffset, Loc.SpillLocation.SpillOffset);
unsigned Base = Loc.SpillLocation.SpillBase;
return BuildMI(MF, DbgLoc, IID, true, Base, Var, SpillExpr);
@@ -457,7 +466,7 @@ private:
llvm_unreachable(
"Tried to produce DBG_VALUE for invalid or backup VarLoc");
}
- llvm_unreachable("Unrecognized LiveDebugValues.VarLoc.Kind enum");
+ llvm_unreachable("Unrecognized VarLocBasedLDV.VarLoc.Kind enum");
}
/// Is the Loc field a constant or constant object?
@@ -511,7 +520,9 @@ private:
break;
case SpillLocKind:
Out << printReg(Loc.SpillLocation.SpillBase, TRI);
- Out << "[" << Loc.SpillLocation.SpillOffset << "]";
+ Out << "[" << Loc.SpillLocation.SpillOffset.getFixed() << " + "
+ << Loc.SpillLocation.SpillOffset.getScalable() << "x vscale"
+ << "]";
break;
case ImmediateKind:
Out << Loc.Immediate;
@@ -534,14 +545,46 @@ private:
#endif
bool operator==(const VarLoc &Other) const {
- return Kind == Other.Kind && Var == Other.Var &&
- Loc.Hash == Other.Loc.Hash && Expr == Other.Expr;
+ if (Kind != Other.Kind || !(Var == Other.Var) || Expr != Other.Expr)
+ return false;
+
+ switch (Kind) {
+ case SpillLocKind:
+ return Loc.SpillLocation == Other.Loc.SpillLocation;
+ case RegisterKind:
+ case ImmediateKind:
+ case EntryValueKind:
+ case EntryValueBackupKind:
+ case EntryValueCopyBackupKind:
+ return Loc.Hash == Other.Loc.Hash;
+ default:
+ llvm_unreachable("Invalid kind");
+ }
}
/// This operator guarantees that VarLocs are sorted by Variable first.
bool operator<(const VarLoc &Other) const {
- return std::tie(Var, Kind, Loc.Hash, Expr) <
- std::tie(Other.Var, Other.Kind, Other.Loc.Hash, Other.Expr);
+ switch (Kind) {
+ case SpillLocKind:
+ return std::make_tuple(Var, Kind, Loc.SpillLocation.SpillBase,
+ Loc.SpillLocation.SpillOffset.getFixed(),
+ Loc.SpillLocation.SpillOffset.getScalable(),
+ Expr) <
+ std::make_tuple(
+ Other.Var, Other.Kind, Other.Loc.SpillLocation.SpillBase,
+ Other.Loc.SpillLocation.SpillOffset.getFixed(),
+ Other.Loc.SpillLocation.SpillOffset.getScalable(),
+ Other.Expr);
+ case RegisterKind:
+ case ImmediateKind:
+ case EntryValueKind:
+ case EntryValueBackupKind:
+ case EntryValueCopyBackupKind:
+ return std::tie(Var, Kind, Loc.Hash, Expr) <
+ std::tie(Other.Var, Other.Kind, Other.Loc.Hash, Other.Expr);
+ default:
+ llvm_unreachable("Invalid kind");
+ }
}
};
@@ -793,30 +836,18 @@ private:
/// had their instruction creation deferred.
void flushPendingLocs(VarLocInMBB &PendingInLocs, VarLocMap &VarLocIDs);
- bool ExtendRanges(MachineFunction &MF);
+ bool ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC) override;
public:
- static char ID;
-
/// Default construct and initialize the pass.
- LiveDebugValues();
+ VarLocBasedLDV();
- /// Tell the pass manager which passes we depend on and what
- /// information we preserve.
- void getAnalysisUsage(AnalysisUsage &AU) const override;
-
- MachineFunctionProperties getRequiredProperties() const override {
- return MachineFunctionProperties().set(
- MachineFunctionProperties::Property::NoVRegs);
- }
+ ~VarLocBasedLDV();
/// Print to ostream with a message.
void printVarLocInMBB(const MachineFunction &MF, const VarLocInMBB &V,
const VarLocMap &VarLocIDs, const char *msg,
raw_ostream &Out) const;
-
- /// Calculate the liveness information for the given machine function.
- bool runOnMachineFunction(MachineFunction &MF) override;
};
} // end anonymous namespace
@@ -825,31 +856,16 @@ public:
// Implementation
//===----------------------------------------------------------------------===//
-char LiveDebugValues::ID = 0;
-
-char &llvm::LiveDebugValuesID = LiveDebugValues::ID;
-
-INITIALIZE_PASS(LiveDebugValues, DEBUG_TYPE, "Live DEBUG_VALUE analysis",
- false, false)
+VarLocBasedLDV::VarLocBasedLDV() { }
-/// Default construct and initialize the pass.
-LiveDebugValues::LiveDebugValues() : MachineFunctionPass(ID) {
- initializeLiveDebugValuesPass(*PassRegistry::getPassRegistry());
-}
-
-/// Tell the pass manager which passes we depend on and what information we
-/// preserve.
-void LiveDebugValues::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
- MachineFunctionPass::getAnalysisUsage(AU);
-}
+VarLocBasedLDV::~VarLocBasedLDV() { }
/// Erase a variable from the set of open ranges, and additionally erase any
-/// fragments that may overlap it. If the VarLoc is a buckup location, erase
+/// fragments that may overlap it. If the VarLoc is a backup location, erase
/// the variable from the EntryValuesBackupVars set, indicating we should stop
/// tracking its backup entry location. Otherwise, if the VarLoc is primary
/// location, erase the variable from the Vars set.
-void LiveDebugValues::OpenRangesSet::erase(const VarLoc &VL) {
+void VarLocBasedLDV::OpenRangesSet::erase(const VarLoc &VL) {
// Erasure helper.
auto DoErase = [VL, this](DebugVariable VarToErase) {
auto *EraseFrom = VL.isEntryBackupLoc() ? &EntryValuesBackupVars : &Vars;
@@ -875,15 +891,15 @@ void LiveDebugValues::OpenRangesSet::erase(const VarLoc &VL) {
auto MapIt = OverlappingFragments.find({Var.getVariable(), ThisFragment});
if (MapIt != OverlappingFragments.end()) {
for (auto Fragment : MapIt->second) {
- LiveDebugValues::OptFragmentInfo FragmentHolder;
+ VarLocBasedLDV::OptFragmentInfo FragmentHolder;
if (!DebugVariable::isDefaultFragment(Fragment))
- FragmentHolder = LiveDebugValues::OptFragmentInfo(Fragment);
+ FragmentHolder = VarLocBasedLDV::OptFragmentInfo(Fragment);
DoErase({Var.getVariable(), FragmentHolder, Var.getInlinedAt()});
}
}
}
-void LiveDebugValues::OpenRangesSet::erase(const VarLocSet &KillSet,
+void VarLocBasedLDV::OpenRangesSet::erase(const VarLocSet &KillSet,
const VarLocMap &VarLocIDs) {
VarLocs.intersectWithComplement(KillSet);
for (uint64_t ID : KillSet) {
@@ -893,7 +909,7 @@ void LiveDebugValues::OpenRangesSet::erase(const VarLocSet &KillSet,
}
}
-void LiveDebugValues::OpenRangesSet::insert(LocIndex VarLocID,
+void VarLocBasedLDV::OpenRangesSet::insert(LocIndex VarLocID,
const VarLoc &VL) {
auto *InsertInto = VL.isEntryBackupLoc() ? &EntryValuesBackupVars : &Vars;
VarLocs.set(VarLocID.getAsRawInteger());
@@ -903,7 +919,7 @@ void LiveDebugValues::OpenRangesSet::insert(LocIndex VarLocID,
/// Return the Loc ID of an entry value backup location, if it exists for the
/// variable.
llvm::Optional<LocIndex>
-LiveDebugValues::OpenRangesSet::getEntryValueBackup(DebugVariable Var) {
+VarLocBasedLDV::OpenRangesSet::getEntryValueBackup(DebugVariable Var) {
auto It = EntryValuesBackupVars.find(Var);
if (It != EntryValuesBackupVars.end())
return It->second;
@@ -911,7 +927,7 @@ LiveDebugValues::OpenRangesSet::getEntryValueBackup(DebugVariable Var) {
return llvm::None;
}
-void LiveDebugValues::collectIDsForRegs(VarLocSet &Collected,
+void VarLocBasedLDV::collectIDsForRegs(VarLocSet &Collected,
const DefinedRegsSet &Regs,
const VarLocSet &CollectFrom) const {
assert(!Regs.empty() && "Nothing to collect");
@@ -937,7 +953,7 @@ void LiveDebugValues::collectIDsForRegs(VarLocSet &Collected,
}
}
-void LiveDebugValues::getUsedRegs(const VarLocSet &CollectFrom,
+void VarLocBasedLDV::getUsedRegs(const VarLocSet &CollectFrom,
SmallVectorImpl<uint32_t> &UsedRegs) const {
// All register-based VarLocs are assigned indices greater than or equal to
// FirstRegIndex.
@@ -967,7 +983,7 @@ void LiveDebugValues::getUsedRegs(const VarLocSet &CollectFrom,
//===----------------------------------------------------------------------===//
#ifndef NDEBUG
-void LiveDebugValues::printVarLocInMBB(const MachineFunction &MF,
+void VarLocBasedLDV::printVarLocInMBB(const MachineFunction &MF,
const VarLocInMBB &V,
const VarLocMap &VarLocIDs,
const char *msg,
@@ -991,8 +1007,8 @@ void LiveDebugValues::printVarLocInMBB(const MachineFunction &MF,
}
#endif
-LiveDebugValues::VarLoc::SpillLoc
-LiveDebugValues::extractSpillBaseRegAndOffset(const MachineInstr &MI) {
+VarLocBasedLDV::VarLoc::SpillLoc
+VarLocBasedLDV::extractSpillBaseRegAndOffset(const MachineInstr &MI) {
assert(MI.hasOneMemOperand() &&
"Spill instruction does not have exactly one memory operand?");
auto MMOI = MI.memoperands_begin();
@@ -1002,14 +1018,14 @@ LiveDebugValues::extractSpillBaseRegAndOffset(const MachineInstr &MI) {
int FI = cast<FixedStackPseudoSourceValue>(PVal)->getFrameIndex();
const MachineBasicBlock *MBB = MI.getParent();
Register Reg;
- int Offset = TFI->getFrameIndexReference(*MBB->getParent(), FI, Reg);
+ StackOffset Offset = TFI->getFrameIndexReference(*MBB->getParent(), FI, Reg);
return {Reg, Offset};
}
/// Try to salvage the debug entry value if we encounter a new debug value
/// describing the same parameter, otherwise stop tracking the value. Return
/// true if we should stop tracking the entry value, otherwise return false.
-bool LiveDebugValues::removeEntryValue(const MachineInstr &MI,
+bool VarLocBasedLDV::removeEntryValue(const MachineInstr &MI,
OpenRangesSet &OpenRanges,
VarLocMap &VarLocIDs,
const VarLoc &EntryVL) {
@@ -1061,7 +1077,7 @@ bool LiveDebugValues::removeEntryValue(const MachineInstr &MI,
/// End all previous ranges related to @MI and start a new range from @MI
/// if it is a DBG_VALUE instr.
-void LiveDebugValues::transferDebugValue(const MachineInstr &MI,
+void VarLocBasedLDV::transferDebugValue(const MachineInstr &MI,
OpenRangesSet &OpenRanges,
VarLocMap &VarLocIDs) {
if (!MI.isDebugValue())
@@ -1112,7 +1128,7 @@ void LiveDebugValues::transferDebugValue(const MachineInstr &MI,
}
/// Turn the entry value backup locations into primary locations.
-void LiveDebugValues::emitEntryValues(MachineInstr &MI,
+void VarLocBasedLDV::emitEntryValues(MachineInstr &MI,
OpenRangesSet &OpenRanges,
VarLocMap &VarLocIDs,
TransferMap &Transfers,
@@ -1150,7 +1166,7 @@ void LiveDebugValues::emitEntryValues(MachineInstr &MI,
/// new VarLoc. If \p NewReg is different than default zero value then the
/// new location will be register location created by the copy like instruction,
/// otherwise it is variable's location on the stack.
-void LiveDebugValues::insertTransferDebugPair(
+void VarLocBasedLDV::insertTransferDebugPair(
MachineInstr &MI, OpenRangesSet &OpenRanges, TransferMap &Transfers,
VarLocMap &VarLocIDs, LocIndex OldVarID, TransferKind Kind,
Register NewReg) {
@@ -1217,7 +1233,7 @@ void LiveDebugValues::insertTransferDebugPair(
}
/// A definition of a register may mark the end of a range.
-void LiveDebugValues::transferRegisterDef(
+void VarLocBasedLDV::transferRegisterDef(
MachineInstr &MI, OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs,
TransferMap &Transfers) {
@@ -1278,14 +1294,14 @@ void LiveDebugValues::transferRegisterDef(
collectIDsForRegs(KillSet, DeadRegs, OpenRanges.getVarLocs());
OpenRanges.erase(KillSet, VarLocIDs);
- if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>()) {
+ if (TPC) {
auto &TM = TPC->getTM<TargetMachine>();
if (TM.Options.ShouldEmitDebugEntryValues())
emitEntryValues(MI, OpenRanges, VarLocIDs, Transfers, KillSet);
}
}
-bool LiveDebugValues::isSpillInstruction(const MachineInstr &MI,
+bool VarLocBasedLDV::isSpillInstruction(const MachineInstr &MI,
MachineFunction *MF) {
// TODO: Handle multiple stores folded into one.
if (!MI.hasOneMemOperand())
@@ -1298,7 +1314,7 @@ bool LiveDebugValues::isSpillInstruction(const MachineInstr &MI,
return true;
}
-bool LiveDebugValues::isLocationSpill(const MachineInstr &MI,
+bool VarLocBasedLDV::isLocationSpill(const MachineInstr &MI,
MachineFunction *MF, Register &Reg) {
if (!isSpillInstruction(MI, MF))
return false;
@@ -1338,8 +1354,8 @@ bool LiveDebugValues::isLocationSpill(const MachineInstr &MI,
return false;
}
-Optional<LiveDebugValues::VarLoc::SpillLoc>
-LiveDebugValues::isRestoreInstruction(const MachineInstr &MI,
+Optional<VarLocBasedLDV::VarLoc::SpillLoc>
+VarLocBasedLDV::isRestoreInstruction(const MachineInstr &MI,
MachineFunction *MF, Register &Reg) {
if (!MI.hasOneMemOperand())
return None;
@@ -1360,7 +1376,7 @@ LiveDebugValues::isRestoreInstruction(const MachineInstr &MI,
/// the DBG_VALUE without inserting it and keep track of it in \p Transfers.
/// It will be inserted into the BB when we're done iterating over the
/// instructions.
-void LiveDebugValues::transferSpillOrRestoreInst(MachineInstr &MI,
+void VarLocBasedLDV::transferSpillOrRestoreInst(MachineInstr &MI,
OpenRangesSet &OpenRanges,
VarLocMap &VarLocIDs,
TransferMap &Transfers) {
@@ -1449,7 +1465,7 @@ void LiveDebugValues::transferSpillOrRestoreInst(MachineInstr &MI,
/// If \p MI is a register copy instruction, that copies a previously tracked
/// value from one register to another register that is callee saved, we
/// create new DBG_VALUE instruction described with copy destination register.
-void LiveDebugValues::transferRegisterCopy(MachineInstr &MI,
+void VarLocBasedLDV::transferRegisterCopy(MachineInstr &MI,
OpenRangesSet &OpenRanges,
VarLocMap &VarLocIDs,
TransferMap &Transfers) {
@@ -1519,7 +1535,7 @@ void LiveDebugValues::transferRegisterCopy(MachineInstr &MI,
}
/// Terminate all open ranges at the end of the current basic block.
-bool LiveDebugValues::transferTerminator(MachineBasicBlock *CurMBB,
+bool VarLocBasedLDV::transferTerminator(MachineBasicBlock *CurMBB,
OpenRangesSet &OpenRanges,
VarLocInMBB &OutLocs,
const VarLocMap &VarLocIDs) {
@@ -1551,7 +1567,7 @@ bool LiveDebugValues::transferTerminator(MachineBasicBlock *CurMBB,
/// Variable which are known to exist.
/// \param OverlappingFragments The overlap map being constructed, from one
/// Var/Fragment pair to a vector of fragments known to overlap.
-void LiveDebugValues::accumulateFragmentMap(MachineInstr &MI,
+void VarLocBasedLDV::accumulateFragmentMap(MachineInstr &MI,
VarToFragments &SeenFragments,
OverlapMap &OverlappingFragments) {
DebugVariable MIVar(MI.getDebugVariable(), MI.getDebugExpression(),
@@ -1603,7 +1619,7 @@ void LiveDebugValues::accumulateFragmentMap(MachineInstr &MI,
}
/// This routine creates OpenRanges.
-void LiveDebugValues::process(MachineInstr &MI, OpenRangesSet &OpenRanges,
+void VarLocBasedLDV::process(MachineInstr &MI, OpenRangesSet &OpenRanges,
VarLocMap &VarLocIDs, TransferMap &Transfers) {
transferDebugValue(MI, OpenRanges, VarLocIDs);
transferRegisterDef(MI, OpenRanges, VarLocIDs, Transfers);
@@ -1614,7 +1630,7 @@ void LiveDebugValues::process(MachineInstr &MI, OpenRangesSet &OpenRanges,
/// This routine joins the analysis results of all incoming edges in @MBB by
/// inserting a new DBG_VALUE instruction at the start of the @MBB - if the same
/// source variable in all the predecessors of @MBB reside in the same location.
-bool LiveDebugValues::join(
+bool VarLocBasedLDV::join(
MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs,
const VarLocMap &VarLocIDs,
SmallPtrSet<const MachineBasicBlock *, 16> &Visited,
@@ -1697,7 +1713,7 @@ bool LiveDebugValues::join(
return Changed;
}
-void LiveDebugValues::flushPendingLocs(VarLocInMBB &PendingInLocs,
+void VarLocBasedLDV::flushPendingLocs(VarLocInMBB &PendingInLocs,
VarLocMap &VarLocIDs) {
// PendingInLocs records all locations propagated into blocks, which have
// not had DBG_VALUE insts created. Go through and create those insts now.
@@ -1721,7 +1737,7 @@ void LiveDebugValues::flushPendingLocs(VarLocInMBB &PendingInLocs,
}
}
-bool LiveDebugValues::isEntryValueCandidate(
+bool VarLocBasedLDV::isEntryValueCandidate(
const MachineInstr &MI, const DefinedRegsSet &DefinedRegs) const {
assert(MI.isDebugValue() && "This must be DBG_VALUE.");
@@ -1770,11 +1786,11 @@ static void collectRegDefs(const MachineInstr &MI, DefinedRegsSet &Regs,
/// This routine records the entry values of function parameters. The values
/// could be used as backup values. If we loose the track of some unmodified
/// parameters, the backup values will be used as a primary locations.
-void LiveDebugValues::recordEntryValue(const MachineInstr &MI,
+void VarLocBasedLDV::recordEntryValue(const MachineInstr &MI,
const DefinedRegsSet &DefinedRegs,
OpenRangesSet &OpenRanges,
VarLocMap &VarLocIDs) {
- if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>()) {
+ if (TPC) {
auto &TM = TPC->getTM<TargetMachine>();
if (!TM.Options.ShouldEmitDebugEntryValues())
return;
@@ -1800,9 +1816,25 @@ void LiveDebugValues::recordEntryValue(const MachineInstr &MI,
/// Calculate the liveness information for the given machine function and
/// extend ranges across basic blocks.
-bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
+bool VarLocBasedLDV::ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC) {
LLVM_DEBUG(dbgs() << "\nDebug Range Extension\n");
+ if (!MF.getFunction().getSubprogram())
+ // VarLocBaseLDV will already have removed all DBG_VALUEs.
+ return false;
+
+ // Skip functions from NoDebug compilation units.
+ if (MF.getFunction().getSubprogram()->getUnit()->getEmissionKind() ==
+ DICompileUnit::NoDebug)
+ return false;
+
+ TRI = MF.getSubtarget().getRegisterInfo();
+ TII = MF.getSubtarget().getInstrInfo();
+ TFI = MF.getSubtarget().getFrameLowering();
+ TFI->getCalleeSaves(MF, CalleeSavedRegs);
+ this->TPC = TPC;
+ LS.initialize(MF);
+
bool Changed = false;
bool OLChanged = false;
bool MBBJoined = false;
@@ -1840,8 +1872,8 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
MachineBasicBlock &First_MBB = *(MF.begin());
for (auto &MI : First_MBB) {
collectRegDefs(MI, DefinedRegs, TRI);
- if (MI.isDebugValue())
- recordEntryValue(MI, DefinedRegs, OpenRanges, VarLocIDs);
+ if (MI.isDebugValue())
+ recordEntryValue(MI, DefinedRegs, OpenRanges, VarLocIDs);
}
// Initialize per-block structures and scan for fragment overlaps.
@@ -1878,7 +1910,7 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
if (MI.isDebugValue())
++NumInputDbgValues;
if (NumInputDbgValues > InputDbgValueLimit) {
- LLVM_DEBUG(dbgs() << "Disabling LiveDebugValues: " << MF.getName()
+ LLVM_DEBUG(dbgs() << "Disabling VarLocBasedLDV: " << MF.getName()
<< " has " << RPONumber << " basic blocks and "
<< NumInputDbgValues
<< " input DBG_VALUEs, exceeding limits.\n");
@@ -1955,22 +1987,8 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
return Changed;
}
-bool LiveDebugValues::runOnMachineFunction(MachineFunction &MF) {
- if (!MF.getFunction().getSubprogram())
- // LiveDebugValues will already have removed all DBG_VALUEs.
- return false;
-
- // Skip functions from NoDebug compilation units.
- if (MF.getFunction().getSubprogram()->getUnit()->getEmissionKind() ==
- DICompileUnit::NoDebug)
- return false;
-
- TRI = MF.getSubtarget().getRegisterInfo();
- TII = MF.getSubtarget().getInstrInfo();
- TFI = MF.getSubtarget().getFrameLowering();
- TFI->getCalleeSaves(MF, CalleeSavedRegs);
- LS.initialize(MF);
-
- bool Changed = ExtendRanges(MF);
- return Changed;
+LDVImpl *
+llvm::makeVarLocBasedLiveDebugValues()
+{
+ return new VarLocBasedLDV();
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp
index 158e873370b1..2325341070a3 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp
@@ -54,7 +54,6 @@
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
@@ -96,6 +95,7 @@ LiveDebugVariables::LiveDebugVariables() : MachineFunctionPass(ID) {
enum : unsigned { UndefLocNo = ~0U };
+namespace {
/// Describes a debug variable value by location number and expression along
/// with some flags about the original usage of the location.
class DbgVariableValue {
@@ -136,6 +136,7 @@ private:
unsigned WasIndirect : 1;
const DIExpression *Expression = nullptr;
};
+} // namespace
/// Map of where a user value is live to that value.
using LocMap = IntervalMap<SlotIndex, DbgVariableValue, 4>;
@@ -394,6 +395,11 @@ class LDVImpl {
LiveIntervals *LIS;
const TargetRegisterInfo *TRI;
+ using StashedInstrRef =
+ std::tuple<unsigned, unsigned, const DILocalVariable *,
+ const DIExpression *, DebugLoc>;
+ std::map<SlotIndex, std::vector<StashedInstrRef>> StashedInstrReferences;
+
/// Whether emitDebugValues is called.
bool EmitDone = false;
@@ -430,6 +436,16 @@ class LDVImpl {
/// \returns True if the DBG_VALUE instruction should be deleted.
bool handleDebugValue(MachineInstr &MI, SlotIndex Idx);
+ /// Track a DBG_INSTR_REF. This needs to be removed from the MachineFunction
+ /// during regalloc -- but there's no need to maintain live ranges, as we
+ /// refer to a value rather than a location.
+ ///
+ /// \param MI DBG_INSTR_REF instruction
+ /// \param Idx Last valid SlotIndex before instruction
+ ///
+ /// \returns True if the DBG_VALUE instruction should be deleted.
+ bool handleDebugInstrRef(MachineInstr &MI, SlotIndex Idx);
+
/// Add DBG_LABEL instruction to UserLabel.
///
/// \param MI DBG_LABEL instruction
@@ -458,6 +474,7 @@ public:
/// Release all memory.
void clear() {
MF = nullptr;
+ StashedInstrReferences.clear();
userValues.clear();
userLabels.clear();
virtRegToEqClass.clear();
@@ -665,6 +682,19 @@ bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) {
return true;
}
+bool LDVImpl::handleDebugInstrRef(MachineInstr &MI, SlotIndex Idx) {
+ assert(MI.isDebugRef());
+ unsigned InstrNum = MI.getOperand(0).getImm();
+ unsigned OperandNum = MI.getOperand(1).getImm();
+ auto *Var = MI.getDebugVariable();
+ auto *Expr = MI.getDebugExpression();
+ auto &DL = MI.getDebugLoc();
+ StashedInstrRef Stashed =
+ std::make_tuple(InstrNum, OperandNum, Var, Expr, DL);
+ StashedInstrReferences[Idx].push_back(Stashed);
+ return true;
+}
+
bool LDVImpl::handleDebugLabel(MachineInstr &MI, SlotIndex Idx) {
// DBG_LABEL label
if (MI.getNumOperands() != 1 || !MI.getOperand(0).isMetadata()) {
@@ -712,6 +742,7 @@ bool LDVImpl::collectDebugValues(MachineFunction &mf) {
// Only handle DBG_VALUE in handleDebugValue(). Skip all other
// kinds of debug instructions.
if ((MBBI->isDebugValue() && handleDebugValue(*MBBI, Idx)) ||
+ (MBBI->isDebugRef() && handleDebugInstrRef(*MBBI, Idx)) ||
(MBBI->isDebugLabel() && handleDebugLabel(*MBBI, Idx))) {
MBBI = MBB->erase(MBBI);
Changed = true;
@@ -775,12 +806,12 @@ void UserValue::addDefsFromCopies(
if (Kills.empty())
return;
// Don't track copies from physregs, there are too many uses.
- if (!Register::isVirtualRegister(LI->reg))
+ if (!Register::isVirtualRegister(LI->reg()))
return;
// Collect all the (vreg, valno) pairs that are copies of LI.
SmallVector<std::pair<LiveInterval*, const VNInfo*>, 8> CopyValues;
- for (MachineOperand &MO : MRI.use_nodbg_operands(LI->reg)) {
+ for (MachineOperand &MO : MRI.use_nodbg_operands(LI->reg())) {
MachineInstr *MI = MO.getParent();
// Copies of the full value.
if (MO.getSubReg() || !MI->isCopy())
@@ -991,10 +1022,10 @@ bool LDVImpl::runOnMachineFunction(MachineFunction &mf) {
return Changed;
}
-static void removeDebugValues(MachineFunction &mf) {
+static void removeDebugInstrs(MachineFunction &mf) {
for (MachineBasicBlock &MBB : mf) {
for (auto MBBI = MBB.begin(), MBBE = MBB.end(); MBBI != MBBE; ) {
- if (!MBBI->isDebugValue()) {
+ if (!MBBI->isDebugInstr()) {
++MBBI;
continue;
}
@@ -1007,7 +1038,7 @@ bool LiveDebugVariables::runOnMachineFunction(MachineFunction &mf) {
if (!EnableLDV)
return false;
if (!mf.getFunction().getSubprogram()) {
- removeDebugValues(mf);
+ removeDebugInstrs(mf);
return false;
}
if (!pImpl)
@@ -1064,7 +1095,7 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef<Register> NewRegs,
LII->start < LocMapI.stop()) {
// Overlapping correct location. Allocate NewLocNo now.
if (NewLocNo == UndefLocNo) {
- MachineOperand MO = MachineOperand::CreateReg(LI->reg, false);
+ MachineOperand MO = MachineOperand::CreateReg(LI->reg(), false);
MO.setSubReg(locations[OldLocNo].getSubReg());
NewLocNo = getLocationNo(MO);
DidChange = true;
@@ -1434,6 +1465,28 @@ void LDVImpl::emitDebugValues(VirtRegMap *VRM) {
LLVM_DEBUG(userLabel->print(dbgs(), TRI));
userLabel->emitDebugLabel(*LIS, *TII);
}
+
+ LLVM_DEBUG(dbgs() << "********** EMITTING INSTR REFERENCES **********\n");
+
+ // Re-insert any DBG_INSTR_REFs back in the position they were. Ordering
+ // is preserved by vector.
+ auto Slots = LIS->getSlotIndexes();
+ const MCInstrDesc &RefII = TII->get(TargetOpcode::DBG_INSTR_REF);
+ for (auto &P : StashedInstrReferences) {
+ const SlotIndex &Idx = P.first;
+ auto *MBB = Slots->getMBBFromIndex(Idx);
+ MachineBasicBlock::iterator insertPos = findInsertLocation(MBB, Idx, *LIS);
+ for (auto &Stashed : P.second) {
+ auto MIB = BuildMI(*MF, std::get<4>(Stashed), RefII);
+ MIB.addImm(std::get<0>(Stashed));
+ MIB.addImm(std::get<1>(Stashed));
+ MIB.addMetadata(std::get<2>(Stashed));
+ MIB.addMetadata(std::get<3>(Stashed));
+ MachineInstr *New = MIB;
+ MBB->insert(insertPos, New);
+ }
+ }
+
EmitDone = true;
}
@@ -1442,10 +1495,6 @@ void LiveDebugVariables::emitDebugValues(VirtRegMap *VRM) {
static_cast<LDVImpl*>(pImpl)->emitDebugValues(VRM);
}
-bool LiveDebugVariables::doInitialization(Module &M) {
- return Pass::doInitialization(M);
-}
-
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void LiveDebugVariables::dump() const {
if (pImpl)
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.h b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.h
index 74e738ec3e56..07dd3a83866f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.h
@@ -56,7 +56,6 @@ private:
bool runOnMachineFunction(MachineFunction &) override;
void releaseMemory() override;
void getAnalysisUsage(AnalysisUsage &) const override;
- bool doInitialization(Module &) override;
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveInterval.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveInterval.cpp
index 930dc116205a..ce0e58772068 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveInterval.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveInterval.cpp
@@ -951,9 +951,9 @@ void LiveInterval::refineSubRanges(
MatchingRange = createSubRangeFrom(Allocator, Matching, SR);
// Now that the subrange is split in half, make sure we
// only keep in the subranges the VNIs that touch the related half.
- stripValuesNotDefiningMask(reg, *MatchingRange, Matching, Indexes, TRI,
+ stripValuesNotDefiningMask(reg(), *MatchingRange, Matching, Indexes, TRI,
ComposeSubRegIdx);
- stripValuesNotDefiningMask(reg, SR, SR.LaneMask, Indexes, TRI,
+ stripValuesNotDefiningMask(reg(), SR, SR.LaneMask, Indexes, TRI,
ComposeSubRegIdx);
}
Apply(*MatchingRange);
@@ -977,11 +977,11 @@ void LiveInterval::computeSubRangeUndefs(SmallVectorImpl<SlotIndex> &Undefs,
LaneBitmask LaneMask,
const MachineRegisterInfo &MRI,
const SlotIndexes &Indexes) const {
- assert(Register::isVirtualRegister(reg));
- LaneBitmask VRegMask = MRI.getMaxLaneMaskForVReg(reg);
+ assert(Register::isVirtualRegister(reg()));
+ LaneBitmask VRegMask = MRI.getMaxLaneMaskForVReg(reg());
assert((VRegMask & LaneMask).any());
const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
- for (const MachineOperand &MO : MRI.def_operands(reg)) {
+ for (const MachineOperand &MO : MRI.def_operands(reg())) {
if (!MO.isUndef())
continue;
unsigned SubReg = MO.getSubReg();
@@ -1043,12 +1043,12 @@ void LiveInterval::SubRange::print(raw_ostream &OS) const {
}
void LiveInterval::print(raw_ostream &OS) const {
- OS << printReg(reg) << ' ';
+ OS << printReg(reg()) << ' ';
super::print(OS);
// Print subranges
for (const SubRange &SR : subranges())
OS << SR;
- OS << " weight:" << weight;
+ OS << " weight:" << Weight;
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -1087,7 +1087,7 @@ void LiveInterval::verify(const MachineRegisterInfo *MRI) const {
// Make sure SubRanges are fine and LaneMasks are disjunct.
LaneBitmask Mask;
- LaneBitmask MaxMask = MRI != nullptr ? MRI->getMaxLaneMaskForVReg(reg)
+ LaneBitmask MaxMask = MRI != nullptr ? MRI->getMaxLaneMaskForVReg(reg())
: LaneBitmask::getAll();
for (const SubRange &SR : subranges()) {
// Subrange lanemask should be disjunct to any previous subrange masks.
@@ -1361,8 +1361,9 @@ unsigned ConnectedVNInfoEqClasses::Classify(const LiveRange &LR) {
void ConnectedVNInfoEqClasses::Distribute(LiveInterval &LI, LiveInterval *LIV[],
MachineRegisterInfo &MRI) {
// Rewrite instructions.
- for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(LI.reg),
- RE = MRI.reg_end(); RI != RE;) {
+ for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(LI.reg()),
+ RE = MRI.reg_end();
+ RI != RE;) {
MachineOperand &MO = *RI;
MachineInstr *MI = RI->getParent();
++RI;
@@ -1382,7 +1383,7 @@ void ConnectedVNInfoEqClasses::Distribute(LiveInterval &LI, LiveInterval *LIV[],
if (!VNI)
continue;
if (unsigned EqClass = getEqClass(VNI))
- MO.setReg(LIV[EqClass-1]->reg);
+ MO.setReg(LIV[EqClass - 1]->reg());
}
// Distribute subregister liveranges.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalCalc.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalCalc.cpp
index 30c2d74a71c5..2756086cb8b1 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalCalc.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalCalc.cpp
@@ -60,7 +60,7 @@ void LiveIntervalCalc::calculate(LiveInterval &LI, bool TrackSubRegs) {
// Visit all def operands. If the same instruction has multiple defs of Reg,
// createDeadDef() will deduplicate.
const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo();
- unsigned Reg = LI.reg;
+ unsigned Reg = LI.reg();
for (const MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) {
if (!MO.isDef() && !MO.readsReg())
continue;
@@ -127,7 +127,7 @@ void LiveIntervalCalc::constructMainRangeFromSubranges(LiveInterval &LI) {
}
}
resetLiveOutMap();
- extendToUses(MainRange, LI.reg, LaneBitmask::getAll(), &LI);
+ extendToUses(MainRange, LI.reg(), LaneBitmask::getAll(), &LI);
}
void LiveIntervalCalc::createDeadDefs(LiveRange &LR, Register Reg) {
@@ -202,4 +202,4 @@ void LiveIntervalCalc::extendToUses(LiveRange &LR, Register Reg,
// reading Reg multiple times. That is OK, extend() is idempotent.
extend(LR, UseIdx, Reg, Undefs);
}
-} \ No newline at end of file
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalUnion.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalUnion.cpp
index 43fa8f2d7157..7ccb8df4bc05 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalUnion.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalUnion.cpp
@@ -85,8 +85,8 @@ LiveIntervalUnion::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
return;
}
for (LiveSegments::const_iterator SI = Segments.begin(); SI.valid(); ++SI) {
- OS << " [" << SI.start() << ' ' << SI.stop() << "):"
- << printReg(SI.value()->reg, TRI);
+ OS << " [" << SI.start() << ' ' << SI.stop()
+ << "):" << printReg(SI.value()->reg(), TRI);
}
OS << '\n';
}
@@ -95,10 +95,20 @@ LiveIntervalUnion::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
// Verify the live intervals in this union and add them to the visited set.
void LiveIntervalUnion::verify(LiveVirtRegBitSet& VisitedVRegs) {
for (SegmentIter SI = Segments.begin(); SI.valid(); ++SI)
- VisitedVRegs.set(SI.value()->reg);
+ VisitedVRegs.set(SI.value()->reg());
}
#endif //!NDEBUG
+LiveInterval *LiveIntervalUnion::getOneVReg() const {
+ if (empty())
+ return nullptr;
+ for (LiveSegments::const_iterator SI = Segments.begin(); SI.valid(); ++SI) {
+ // return the first valid live interval
+ return SI.value();
+ }
+ return nullptr;
+}
+
// Scan the vector of interfering virtual registers in this union. Assume it's
// quite small.
bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp
index e8ee0599e1a2..a32b486240c8 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp
@@ -37,6 +37,7 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/Config/llvm-config.h"
+#include "llvm/IR/InstrTypes.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
@@ -159,7 +160,7 @@ void LiveIntervals::print(raw_ostream &OS, const Module* ) const {
// Dump the virtregs.
for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = Register::index2VirtReg(i);
+ Register Reg = Register::index2VirtReg(i);
if (hasInterval(Reg))
OS << getInterval(Reg) << '\n';
}
@@ -183,7 +184,7 @@ LLVM_DUMP_METHOD void LiveIntervals::dumpInstrs() const {
}
#endif
-LiveInterval* LiveIntervals::createInterval(unsigned reg) {
+LiveInterval *LiveIntervals::createInterval(Register reg) {
float Weight = Register::isPhysicalRegister(reg) ? huge_valf : 0.0F;
return new LiveInterval(reg, Weight);
}
@@ -193,13 +194,13 @@ bool LiveIntervals::computeVirtRegInterval(LiveInterval &LI) {
assert(LICalc && "LICalc not initialized.");
assert(LI.empty() && "Should only compute empty intervals.");
LICalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
- LICalc->calculate(LI, MRI->shouldTrackSubRegLiveness(LI.reg));
+ LICalc->calculate(LI, MRI->shouldTrackSubRegLiveness(LI.reg()));
return computeDeadValues(LI, nullptr);
}
void LiveIntervals::computeVirtRegs() {
for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = Register::index2VirtReg(i);
+ Register Reg = Register::index2VirtReg(i);
if (MRI->reg_nodbg_empty(Reg))
continue;
LiveInterval &LI = createEmptyInterval(Reg);
@@ -225,6 +226,15 @@ void LiveIntervals::computeRegMasks() {
RegMaskBits.push_back(Mask);
}
+ // Unwinders may clobber additional registers.
+ // FIXME: This functionality can possibly be merged into
+ // MachineBasicBlock::getBeginClobberMask().
+ if (MBB.isEHPad())
+ if (auto *Mask = TRI->getCustomEHPadPreservedMask(*MBB.getParent())) {
+ RegMaskSlots.push_back(Indexes->getMBBStartIdx(&MBB));
+ RegMaskBits.push_back(Mask);
+ }
+
for (const MachineInstr &MI : MBB) {
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isRegMask())
@@ -277,7 +287,7 @@ void LiveIntervals::computeRegUnitRange(LiveRange &LR, unsigned Unit) {
bool IsRootReserved = true;
for (MCSuperRegIterator Super(*Root, TRI, /*IncludeSelf=*/true);
Super.isValid(); ++Super) {
- unsigned Reg = *Super;
+ MCRegister Reg = *Super;
if (!MRI->reg_empty(Reg))
LICalc->createDeadDefs(LR, Reg);
// A register unit is considered reserved if all its roots and all their
@@ -296,7 +306,7 @@ void LiveIntervals::computeRegUnitRange(LiveRange &LR, unsigned Unit) {
for (MCRegUnitRootIterator Root(Unit, TRI); Root.isValid(); ++Root) {
for (MCSuperRegIterator Super(*Root, TRI, /*IncludeSelf=*/true);
Super.isValid(); ++Super) {
- unsigned Reg = *Super;
+ MCRegister Reg = *Super;
if (!MRI->reg_empty(Reg))
LICalc->extendToUses(LR, Reg);
}
@@ -362,7 +372,7 @@ static void createSegmentsForValues(LiveRange &LR,
void LiveIntervals::extendSegmentsToUses(LiveRange &Segments,
ShrinkToUsesWorkList &WorkList,
- unsigned Reg, LaneBitmask LaneMask) {
+ Register Reg, LaneBitmask LaneMask) {
// Keep track of the PHIs that are in use.
SmallPtrSet<VNInfo*, 8> UsedPHIs;
// Blocks that have already been added to WorkList as live-out.
@@ -444,13 +454,13 @@ void LiveIntervals::extendSegmentsToUses(LiveRange &Segments,
bool LiveIntervals::shrinkToUses(LiveInterval *li,
SmallVectorImpl<MachineInstr*> *dead) {
LLVM_DEBUG(dbgs() << "Shrink: " << *li << '\n');
- assert(Register::isVirtualRegister(li->reg) &&
+ assert(Register::isVirtualRegister(li->reg()) &&
"Can only shrink virtual registers");
// Shrink subregister live ranges.
bool NeedsCleanup = false;
for (LiveInterval::SubRange &S : li->subranges()) {
- shrinkToUses(S, li->reg);
+ shrinkToUses(S, li->reg());
if (S.empty())
NeedsCleanup = true;
}
@@ -460,8 +470,8 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
// Find all the values used, including PHI kills.
ShrinkToUsesWorkList WorkList;
- // Visit all instructions reading li->reg.
- unsigned Reg = li->reg;
+ // Visit all instructions reading li->reg().
+ Register Reg = li->reg();
for (MachineInstr &UseMI : MRI->reg_instructions(Reg)) {
if (UseMI.isDebugValue() || !UseMI.readsVirtualRegister(Reg))
continue;
@@ -514,7 +524,7 @@ bool LiveIntervals::computeDeadValues(LiveInterval &LI,
// Is the register live before? Otherwise we may have to add a read-undef
// flag for subregister defs.
- unsigned VReg = LI.reg;
+ Register VReg = LI.reg();
if (MRI->shouldTrackSubRegLiveness(VReg)) {
if ((I == LI.begin() || std::prev(I)->end < Def) && !VNI->isPHIDef()) {
MachineInstr *MI = getInstructionFromIndex(Def);
@@ -534,7 +544,7 @@ bool LiveIntervals::computeDeadValues(LiveInterval &LI,
// This is a dead def. Make sure the instruction knows.
MachineInstr *MI = getInstructionFromIndex(Def);
assert(MI && "No instruction defining live value");
- MI->addRegisterDead(LI.reg, TRI);
+ MI->addRegisterDead(LI.reg(), TRI);
if (HaveDeadDef)
MayHaveSplitComponents = true;
HaveDeadDef = true;
@@ -548,7 +558,7 @@ bool LiveIntervals::computeDeadValues(LiveInterval &LI,
return MayHaveSplitComponents;
}
-void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg) {
+void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, Register Reg) {
LLVM_DEBUG(dbgs() << "Shrink: " << SR << '\n');
assert(Register::isVirtualRegister(Reg) &&
"Can only shrink virtual registers");
@@ -697,7 +707,7 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
LiveRange::const_iterator>, 4> SRs;
for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = Register::index2VirtReg(i);
+ Register Reg = Register::index2VirtReg(i);
if (MRI->reg_nodbg_empty(Reg))
continue;
const LiveInterval &LI = getInterval(Reg);
@@ -868,14 +878,12 @@ float LiveIntervals::getSpillWeight(bool isDef, bool isUse,
float LiveIntervals::getSpillWeight(bool isDef, bool isUse,
const MachineBlockFrequencyInfo *MBFI,
const MachineBasicBlock *MBB) {
- BlockFrequency Freq = MBFI->getBlockFreq(MBB);
- const float Scale = 1.0f / MBFI->getEntryFreq();
- return (isDef + isUse) * (Freq.getFrequency() * Scale);
+ return (isDef + isUse) * MBFI->getBlockFreqRelativeToEntryBlock(MBB);
}
LiveRange::Segment
-LiveIntervals::addSegmentToEndOfBlock(unsigned reg, MachineInstr &startInst) {
- LiveInterval& Interval = createEmptyInterval(reg);
+LiveIntervals::addSegmentToEndOfBlock(Register Reg, MachineInstr &startInst) {
+ LiveInterval &Interval = createEmptyInterval(Reg);
VNInfo *VN = Interval.getNextValue(
SlotIndex(getInstructionIndex(startInst).getRegSlot()),
getVNInfoAllocator());
@@ -1030,7 +1038,8 @@ public:
// For physregs, only update the regunits that actually have a
// precomputed live range.
- for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units)
+ for (MCRegUnitIterator Units(Reg.asMCReg(), &TRI); Units.isValid();
+ ++Units)
if (LiveRange *LR = getRegUnitLI(*Units))
updateRange(*LR, *Units, LaneBitmask::getNone());
}
@@ -1041,7 +1050,7 @@ public:
private:
/// Update a single live range, assuming an instruction has been moved from
/// OldIdx to NewIdx.
- void updateRange(LiveRange &LR, unsigned Reg, LaneBitmask LaneMask) {
+ void updateRange(LiveRange &LR, Register Reg, LaneBitmask LaneMask) {
if (!Updated.insert(&LR).second)
return;
LLVM_DEBUG({
@@ -1238,7 +1247,7 @@ private:
/// Update LR to reflect an instruction has been moved upwards from OldIdx
/// to NewIdx (NewIdx < OldIdx).
- void handleMoveUp(LiveRange &LR, unsigned Reg, LaneBitmask LaneMask) {
+ void handleMoveUp(LiveRange &LR, Register Reg, LaneBitmask LaneMask) {
LiveRange::iterator E = LR.end();
// Segment going into OldIdx.
LiveRange::iterator OldIdxIn = LR.find(OldIdx.getBaseIndex());
@@ -1420,7 +1429,7 @@ private:
}
// Return the last use of reg between NewIdx and OldIdx.
- SlotIndex findLastUseBefore(SlotIndex Before, unsigned Reg,
+ SlotIndex findLastUseBefore(SlotIndex Before, Register Reg,
LaneBitmask LaneMask) {
if (Register::isVirtualRegister(Reg)) {
SlotIndex LastUse = Before;
@@ -1533,17 +1542,17 @@ void LiveIntervals::handleMoveIntoNewBundle(MachineInstr &BundleStart,
void LiveIntervals::repairOldRegInRange(const MachineBasicBlock::iterator Begin,
const MachineBasicBlock::iterator End,
- const SlotIndex endIdx,
- LiveRange &LR, const unsigned Reg,
+ const SlotIndex EndIdx, LiveRange &LR,
+ const Register Reg,
LaneBitmask LaneMask) {
- LiveInterval::iterator LII = LR.find(endIdx);
+ LiveInterval::iterator LII = LR.find(EndIdx);
SlotIndex lastUseIdx;
if (LII == LR.begin()) {
// This happens when the function is called for a subregister that only
// occurs _after_ the range that is to be repaired.
return;
}
- if (LII != LR.end() && LII->start < endIdx)
+ if (LII != LR.end() && LII->start < EndIdx)
lastUseIdx = LII->end;
else
--LII;
@@ -1637,11 +1646,11 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB,
while (End != MBB->end() && !Indexes->hasIndex(*End))
++End;
- SlotIndex endIdx;
+ SlotIndex EndIdx;
if (End == MBB->end())
- endIdx = getMBBEndIdx(MBB).getPrevSlot();
+ EndIdx = getMBBEndIdx(MBB).getPrevSlot();
else
- endIdx = getInstructionIndex(*End);
+ EndIdx = getInstructionIndex(*End);
Indexes->repairIndexesInRange(MBB, Begin, End);
@@ -1670,13 +1679,13 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB,
continue;
for (LiveInterval::SubRange &S : LI.subranges())
- repairOldRegInRange(Begin, End, endIdx, S, Reg, S.LaneMask);
+ repairOldRegInRange(Begin, End, EndIdx, S, Reg, S.LaneMask);
- repairOldRegInRange(Begin, End, endIdx, LI, Reg);
+ repairOldRegInRange(Begin, End, EndIdx, LI, Reg);
}
}
-void LiveIntervals::removePhysRegDefAt(unsigned Reg, SlotIndex Pos) {
+void LiveIntervals::removePhysRegDefAt(MCRegister Reg, SlotIndex Pos) {
for (MCRegUnitIterator Unit(Reg, TRI); Unit.isValid(); ++Unit) {
if (LiveRange *LR = getCachedRegUnit(*Unit))
if (VNInfo *VNI = LR->getVNInfoAt(Pos))
@@ -1709,7 +1718,7 @@ void LiveIntervals::splitSeparateComponents(LiveInterval &LI,
if (NumComp <= 1)
return;
LLVM_DEBUG(dbgs() << " Split " << NumComp << " components: " << LI << '\n');
- unsigned Reg = LI.reg;
+ Register Reg = LI.reg();
const TargetRegisterClass *RegClass = MRI->getRegClass(Reg);
for (unsigned I = 1; I < NumComp; ++I) {
Register NewVReg = MRI->createVirtualRegister(RegClass);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp
index 9de77c19a23a..037cb5426235 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp
@@ -12,7 +12,6 @@
#include "llvm/CodeGen/LiveRangeEdit.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/CalcSpillWeights.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -188,7 +187,7 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI,
MachineInstr *DefMI = nullptr, *UseMI = nullptr;
// Check that there is a single def and a single use.
- for (MachineOperand &MO : MRI.reg_nodbg_operands(LI->reg)) {
+ for (MachineOperand &MO : MRI.reg_nodbg_operands(LI->reg())) {
MachineInstr *MI = MO.getParent();
if (MO.isDef()) {
if (DefMI && DefMI != MI)
@@ -224,7 +223,7 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI,
<< " into single use: " << *UseMI);
SmallVector<unsigned, 8> Ops;
- if (UseMI->readsWritesVirtualRegister(LI->reg, &Ops).second)
+ if (UseMI->readsWritesVirtualRegister(LI->reg(), &Ops).second)
return false;
MachineInstr *FoldMI = TII.foldMemoryOperand(*UseMI, Ops, *DefMI, &LIS);
@@ -236,7 +235,7 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI,
if (UseMI->shouldUpdateCallSiteInfo())
UseMI->getMF()->moveCallSiteInfo(UseMI, FoldMI);
UseMI->eraseFromParent();
- DefMI->addRegisterDead(LI->reg, nullptr);
+ DefMI->addRegisterDead(LI->reg(), nullptr);
Dead.push_back(DefMI);
++NumDCEFoldedLoads;
return true;
@@ -316,7 +315,7 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink,
if (Reg && MOI->readsReg() && !MRI.isReserved(Reg))
ReadsPhysRegs = true;
else if (MOI->isDef())
- LIS.removePhysRegDefAt(Reg, Idx);
+ LIS.removePhysRegDefAt(Reg.asMCReg(), Idx);
continue;
}
LiveInterval &LI = LIS.getInterval(Reg);
@@ -332,7 +331,7 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink,
// Remove defined value.
if (MOI->isDef()) {
if (TheDelegate && LI.getVNInfoAt(Idx) != nullptr)
- TheDelegate->LRE_WillShrinkVirtReg(LI.reg);
+ TheDelegate->LRE_WillShrinkVirtReg(LI.reg());
LIS.removeVRegDefAt(LI, Idx);
if (LI.empty())
RegsToErase.push_back(Reg);
@@ -369,7 +368,7 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink,
pop_back();
DeadRemats->insert(MI);
const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
- MI->substituteRegister(Dest, NewLI.reg, 0, TRI);
+ MI->substituteRegister(Dest, NewLI.reg(), 0, TRI);
MI->getOperand(0).setIsDead(true);
} else {
if (TheDelegate)
@@ -409,7 +408,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr *> &Dead,
ToShrink.pop_back();
if (foldAsLoad(LI, Dead))
continue;
- unsigned VReg = LI->reg;
+ unsigned VReg = LI->reg();
if (TheDelegate)
TheDelegate->LRE_WillShrinkVirtReg(VReg);
if (!LIS.shrinkToUses(LI, &Dead))
@@ -436,15 +435,15 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr *> &Dead,
if (!SplitLIs.empty())
++NumFracRanges;
- unsigned Original = VRM ? VRM->getOriginal(VReg) : 0;
+ Register Original = VRM ? VRM->getOriginal(VReg) : Register();
for (const LiveInterval *SplitLI : SplitLIs) {
// If LI is an original interval that hasn't been split yet, make the new
// intervals their own originals instead of referring to LI. The original
// interval must contain all the split products, and LI doesn't.
if (Original != VReg && Original != 0)
- VRM->setIsSplitFromReg(SplitLI->reg, Original);
+ VRM->setIsSplitFromReg(SplitLI->reg(), Original);
if (TheDelegate)
- TheDelegate->LRE_DidCloneVirtReg(SplitLI->reg, VReg);
+ TheDelegate->LRE_DidCloneVirtReg(SplitLI->reg(), VReg);
}
}
}
@@ -463,14 +462,14 @@ void
LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF,
const MachineLoopInfo &Loops,
const MachineBlockFrequencyInfo &MBFI) {
- VirtRegAuxInfo VRAI(MF, LIS, VRM, Loops, MBFI);
+ VirtRegAuxInfo VRAI(MF, LIS, *VRM, Loops, MBFI);
for (unsigned I = 0, Size = size(); I < Size; ++I) {
LiveInterval &LI = LIS.getInterval(get(I));
- if (MRI.recomputeRegClass(LI.reg))
+ if (MRI.recomputeRegClass(LI.reg()))
LLVM_DEBUG({
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
- dbgs() << "Inflated " << printReg(LI.reg) << " to "
- << TRI->getRegClassName(MRI.getRegClass(LI.reg)) << '\n';
+ dbgs() << "Inflated " << printReg(LI.reg()) << " to "
+ << TRI->getRegClassName(MRI.getRegClass(LI.reg())) << '\n';
});
VRAI.calculateSpillWeightAndHint(LI);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveRegMatrix.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveRegMatrix.cpp
index 08f046420fa1..a69aa6557e46 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveRegMatrix.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveRegMatrix.cpp
@@ -78,7 +78,7 @@ void LiveRegMatrix::releaseMemory() {
template <typename Callable>
static bool foreachUnit(const TargetRegisterInfo *TRI,
- LiveInterval &VRegInterval, unsigned PhysReg,
+ LiveInterval &VRegInterval, MCRegister PhysReg,
Callable Func) {
if (VRegInterval.hasSubRanges()) {
for (MCRegUnitMaskIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
@@ -101,11 +101,11 @@ static bool foreachUnit(const TargetRegisterInfo *TRI,
return false;
}
-void LiveRegMatrix::assign(LiveInterval &VirtReg, unsigned PhysReg) {
- LLVM_DEBUG(dbgs() << "assigning " << printReg(VirtReg.reg, TRI) << " to "
+void LiveRegMatrix::assign(LiveInterval &VirtReg, MCRegister PhysReg) {
+ LLVM_DEBUG(dbgs() << "assigning " << printReg(VirtReg.reg(), TRI) << " to "
<< printReg(PhysReg, TRI) << ':');
- assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment");
- VRM->assignVirt2Phys(VirtReg.reg, PhysReg);
+ assert(!VRM->hasPhys(VirtReg.reg()) && "Duplicate VirtReg assignment");
+ VRM->assignVirt2Phys(VirtReg.reg(), PhysReg);
foreachUnit(
TRI, VirtReg, PhysReg, [&](unsigned Unit, const LiveRange &Range) {
@@ -119,10 +119,10 @@ void LiveRegMatrix::assign(LiveInterval &VirtReg, unsigned PhysReg) {
}
void LiveRegMatrix::unassign(LiveInterval &VirtReg) {
- Register PhysReg = VRM->getPhys(VirtReg.reg);
- LLVM_DEBUG(dbgs() << "unassigning " << printReg(VirtReg.reg, TRI) << " from "
- << printReg(PhysReg, TRI) << ':');
- VRM->clearVirt(VirtReg.reg);
+ Register PhysReg = VRM->getPhys(VirtReg.reg());
+ LLVM_DEBUG(dbgs() << "unassigning " << printReg(VirtReg.reg(), TRI)
+ << " from " << printReg(PhysReg, TRI) << ':');
+ VRM->clearVirt(VirtReg.reg());
foreachUnit(TRI, VirtReg, PhysReg,
[&](unsigned Unit, const LiveRange &Range) {
@@ -135,7 +135,7 @@ void LiveRegMatrix::unassign(LiveInterval &VirtReg) {
LLVM_DEBUG(dbgs() << '\n');
}
-bool LiveRegMatrix::isPhysRegUsed(unsigned PhysReg) const {
+bool LiveRegMatrix::isPhysRegUsed(MCRegister PhysReg) const {
for (MCRegUnitIterator Unit(PhysReg, TRI); Unit.isValid(); ++Unit) {
if (!Matrix[*Unit].empty())
return true;
@@ -144,12 +144,12 @@ bool LiveRegMatrix::isPhysRegUsed(unsigned PhysReg) const {
}
bool LiveRegMatrix::checkRegMaskInterference(LiveInterval &VirtReg,
- unsigned PhysReg) {
+ MCRegister PhysReg) {
// Check if the cached information is valid.
// The same BitVector can be reused for all PhysRegs.
// We could cache multiple VirtRegs if it becomes necessary.
- if (RegMaskVirtReg != VirtReg.reg || RegMaskTag != UserTag) {
- RegMaskVirtReg = VirtReg.reg;
+ if (RegMaskVirtReg != VirtReg.reg() || RegMaskTag != UserTag) {
+ RegMaskVirtReg = VirtReg.reg();
RegMaskTag = UserTag;
RegMaskUsable.clear();
LIS->checkRegMaskInterference(VirtReg, RegMaskUsable);
@@ -162,10 +162,10 @@ bool LiveRegMatrix::checkRegMaskInterference(LiveInterval &VirtReg,
}
bool LiveRegMatrix::checkRegUnitInterference(LiveInterval &VirtReg,
- unsigned PhysReg) {
+ MCRegister PhysReg) {
if (VirtReg.empty())
return false;
- CoalescerPair CP(VirtReg.reg, PhysReg, *TRI);
+ CoalescerPair CP(VirtReg.reg(), PhysReg, *TRI);
bool Result = foreachUnit(TRI, VirtReg, PhysReg, [&](unsigned Unit,
const LiveRange &Range) {
@@ -176,14 +176,14 @@ bool LiveRegMatrix::checkRegUnitInterference(LiveInterval &VirtReg,
}
LiveIntervalUnion::Query &LiveRegMatrix::query(const LiveRange &LR,
- unsigned RegUnit) {
+ MCRegister RegUnit) {
LiveIntervalUnion::Query &Q = Queries[RegUnit];
Q.init(UserTag, LR, Matrix[RegUnit]);
return Q;
}
LiveRegMatrix::InterferenceKind
-LiveRegMatrix::checkInterference(LiveInterval &VirtReg, unsigned PhysReg) {
+LiveRegMatrix::checkInterference(LiveInterval &VirtReg, MCRegister PhysReg) {
if (VirtReg.empty())
return IK_Free;
@@ -197,9 +197,9 @@ LiveRegMatrix::checkInterference(LiveInterval &VirtReg, unsigned PhysReg) {
// Check the matrix for virtual register interference.
bool Interference = foreachUnit(TRI, VirtReg, PhysReg,
- [&](unsigned Unit, const LiveRange &LR) {
- return query(LR, Unit).checkInterference();
- });
+ [&](MCRegister Unit, const LiveRange &LR) {
+ return query(LR, Unit).checkInterference();
+ });
if (Interference)
return IK_VirtReg;
@@ -207,7 +207,7 @@ LiveRegMatrix::checkInterference(LiveInterval &VirtReg, unsigned PhysReg) {
}
bool LiveRegMatrix::checkInterference(SlotIndex Start, SlotIndex End,
- unsigned PhysReg) {
+ MCRegister PhysReg) {
// Construct artificial live range containing only one segment [Start, End).
VNInfo valno(0, Start);
LiveRange::Segment Seg(Start, End, &valno);
@@ -221,3 +221,13 @@ bool LiveRegMatrix::checkInterference(SlotIndex Start, SlotIndex End,
}
return false;
}
+
+Register LiveRegMatrix::getOneVReg(unsigned PhysReg) const {
+ LiveInterval *VRegInterval = nullptr;
+ for (MCRegUnitIterator Unit(PhysReg, TRI); Unit.isValid(); ++Unit) {
+ if ((VRegInterval = Matrix[*Unit].getOneVReg()))
+ return VRegInterval->reg();
+ }
+
+ return MCRegister::NoRegister;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveRegUnits.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveRegUnits.cpp
index b2731aa0e7db..ea2075bc139d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveRegUnits.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveRegUnits.cpp
@@ -11,15 +11,11 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/LiveRegUnits.h"
-
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBundle.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/TargetRegisterInfo.h"
-#include "llvm/MC/MCRegisterInfo.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp
index 6610491dd111..49b880c30936 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp
@@ -82,17 +82,15 @@ LLVM_DUMP_METHOD void LiveVariables::VarInfo::dump() const {
#endif
/// getVarInfo - Get (possibly creating) a VarInfo object for the given vreg.
-LiveVariables::VarInfo &LiveVariables::getVarInfo(unsigned RegIdx) {
- assert(Register::isVirtualRegister(RegIdx) &&
- "getVarInfo: not a virtual register!");
- VirtRegInfo.grow(RegIdx);
- return VirtRegInfo[RegIdx];
+LiveVariables::VarInfo &LiveVariables::getVarInfo(Register Reg) {
+ assert(Reg.isVirtual() && "getVarInfo: not a virtual register!");
+ VirtRegInfo.grow(Reg);
+ return VirtRegInfo[Reg];
}
-void LiveVariables::MarkVirtRegAliveInBlock(VarInfo& VRInfo,
- MachineBasicBlock *DefBlock,
- MachineBasicBlock *MBB,
- std::vector<MachineBasicBlock*> &WorkList) {
+void LiveVariables::MarkVirtRegAliveInBlock(
+ VarInfo &VRInfo, MachineBasicBlock *DefBlock, MachineBasicBlock *MBB,
+ SmallVectorImpl<MachineBasicBlock *> &WorkList) {
unsigned BBNum = MBB->getNumber();
// Check to see if this basic block is one of the killing blocks. If so,
@@ -118,7 +116,7 @@ void LiveVariables::MarkVirtRegAliveInBlock(VarInfo& VRInfo,
void LiveVariables::MarkVirtRegAliveInBlock(VarInfo &VRInfo,
MachineBasicBlock *DefBlock,
MachineBasicBlock *MBB) {
- std::vector<MachineBasicBlock*> WorkList;
+ SmallVector<MachineBasicBlock *, 16> WorkList;
MarkVirtRegAliveInBlock(VRInfo, DefBlock, MBB, WorkList);
while (!WorkList.empty()) {
@@ -128,13 +126,13 @@ void LiveVariables::MarkVirtRegAliveInBlock(VarInfo &VRInfo,
}
}
-void LiveVariables::HandleVirtRegUse(unsigned reg, MachineBasicBlock *MBB,
+void LiveVariables::HandleVirtRegUse(Register Reg, MachineBasicBlock *MBB,
MachineInstr &MI) {
- assert(MRI->getVRegDef(reg) && "Register use before def!");
+ assert(MRI->getVRegDef(Reg) && "Register use before def!");
unsigned BBNum = MBB->getNumber();
- VarInfo& VRInfo = getVarInfo(reg);
+ VarInfo &VRInfo = getVarInfo(Reg);
// Check to see if this basic block is already a kill block.
if (!VRInfo.Kills.empty() && VRInfo.Kills.back()->getParent() == MBB) {
@@ -165,7 +163,8 @@ void LiveVariables::HandleVirtRegUse(unsigned reg, MachineBasicBlock *MBB,
// where there is a use in a PHI node that's a predecessor to the defining
// block. We don't want to mark all predecessors as having the value "alive"
// in this case.
- if (MBB == MRI->getVRegDef(reg)->getParent()) return;
+ if (MBB == MRI->getVRegDef(Reg)->getParent())
+ return;
// Add a new kill entry for this basic block. If this virtual register is
// already marked as alive in this basic block, that means it is alive in at
@@ -176,10 +175,10 @@ void LiveVariables::HandleVirtRegUse(unsigned reg, MachineBasicBlock *MBB,
// Update all dominating blocks to mark them as "known live".
for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
E = MBB->pred_end(); PI != E; ++PI)
- MarkVirtRegAliveInBlock(VRInfo, MRI->getVRegDef(reg)->getParent(), *PI);
+ MarkVirtRegAliveInBlock(VRInfo, MRI->getVRegDef(Reg)->getParent(), *PI);
}
-void LiveVariables::HandleVirtRegDef(unsigned Reg, MachineInstr &MI) {
+void LiveVariables::HandleVirtRegDef(Register Reg, MachineInstr &MI) {
VarInfo &VRInfo = getVarInfo(Reg);
if (VRInfo.AliveBlocks.empty())
@@ -189,8 +188,9 @@ void LiveVariables::HandleVirtRegDef(unsigned Reg, MachineInstr &MI) {
/// FindLastPartialDef - Return the last partial def of the specified register.
/// Also returns the sub-registers that're defined by the instruction.
-MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg,
- SmallSet<unsigned,4> &PartDefRegs) {
+MachineInstr *
+LiveVariables::FindLastPartialDef(Register Reg,
+ SmallSet<unsigned, 4> &PartDefRegs) {
unsigned LastDefReg = 0;
unsigned LastDefDist = 0;
MachineInstr *LastDef = nullptr;
@@ -228,7 +228,7 @@ MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg,
/// HandlePhysRegUse - Turn previous partial def's into read/mod/writes. Add
/// implicit defs to a machine instruction if there was an earlier def of its
/// super-register.
-void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr &MI) {
+void LiveVariables::HandlePhysRegUse(Register Reg, MachineInstr &MI) {
MachineInstr *LastDef = PhysRegDef[Reg];
// If there was a previous use or a "full" def all is well.
if (!LastDef && !PhysRegUse[Reg]) {
@@ -278,7 +278,7 @@ void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr &MI) {
/// FindLastRefOrPartRef - Return the last reference or partial reference of
/// the specified register.
-MachineInstr *LiveVariables::FindLastRefOrPartRef(unsigned Reg) {
+MachineInstr *LiveVariables::FindLastRefOrPartRef(Register Reg) {
MachineInstr *LastDef = PhysRegDef[Reg];
MachineInstr *LastUse = PhysRegUse[Reg];
if (!LastDef && !LastUse)
@@ -308,7 +308,7 @@ MachineInstr *LiveVariables::FindLastRefOrPartRef(unsigned Reg) {
return LastRefOrPartRef;
}
-bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
+bool LiveVariables::HandlePhysRegKill(Register Reg, MachineInstr *MI) {
MachineInstr *LastDef = PhysRegDef[Reg];
MachineInstr *LastUse = PhysRegUse[Reg];
if (!LastDef && !LastUse)
@@ -440,7 +440,7 @@ void LiveVariables::HandleRegMask(const MachineOperand &MO) {
}
}
-void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI,
+void LiveVariables::HandlePhysRegDef(Register Reg, MachineInstr *MI,
SmallVectorImpl<unsigned> &Defs) {
// What parts of the register are previously defined?
SmallSet<unsigned, 32> Live;
@@ -486,7 +486,7 @@ void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI,
void LiveVariables::UpdatePhysRegDefs(MachineInstr &MI,
SmallVectorImpl<unsigned> &Defs) {
while (!Defs.empty()) {
- unsigned Reg = Defs.back();
+ Register Reg = Defs.back();
Defs.pop_back();
for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
SubRegs.isValid(); ++SubRegs) {
@@ -653,7 +653,7 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
// Convert and transfer the dead / killed information we have gathered into
// VirtRegInfo onto MI's.
for (unsigned i = 0, e1 = VirtRegInfo.size(); i != e1; ++i) {
- const unsigned Reg = Register::index2VirtReg(i);
+ const Register Reg = Register::index2VirtReg(i);
for (unsigned j = 0, e2 = VirtRegInfo[Reg].Kills.size(); j != e2; ++j)
if (VirtRegInfo[Reg].Kills[j] == MRI->getVRegDef(Reg))
VirtRegInfo[Reg].Kills[j]->addRegisterDead(Reg, TRI);
@@ -666,7 +666,7 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
// other part of the code generator if this happens.
#ifndef NDEBUG
for(MachineFunction::iterator i = MF->begin(), e = MF->end(); i != e; ++i)
- assert(Visited.count(&*i) != 0 && "unreachable basic block found");
+ assert(Visited.contains(&*i) && "unreachable basic block found");
#endif
PhysRegDef.clear();
@@ -678,7 +678,7 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
/// replaceKillInstruction - Update register kill info by replacing a kill
/// instruction with a new one.
-void LiveVariables::replaceKillInstruction(unsigned Reg, MachineInstr &OldMI,
+void LiveVariables::replaceKillInstruction(Register Reg, MachineInstr &OldMI,
MachineInstr &NewMI) {
VarInfo &VI = getVarInfo(Reg);
std::replace(VI.Kills.begin(), VI.Kills.end(), &OldMI, &NewMI);
@@ -718,8 +718,7 @@ void LiveVariables::analyzePHINodes(const MachineFunction& Fn) {
}
bool LiveVariables::VarInfo::isLiveIn(const MachineBasicBlock &MBB,
- unsigned Reg,
- MachineRegisterInfo &MRI) {
+ Register Reg, MachineRegisterInfo &MRI) {
unsigned Num = MBB.getNumber();
// Reg is live-through.
@@ -735,7 +734,7 @@ bool LiveVariables::VarInfo::isLiveIn(const MachineBasicBlock &MBB,
return findKill(&MBB);
}
-bool LiveVariables::isLiveOut(unsigned Reg, const MachineBasicBlock &MBB) {
+bool LiveVariables::isLiveOut(Register Reg, const MachineBasicBlock &MBB) {
LiveVariables::VarInfo &VI = getVarInfo(Reg);
SmallPtrSet<const MachineBasicBlock *, 8> Kills;
@@ -793,7 +792,7 @@ void LiveVariables::addNewBlock(MachineBasicBlock *BB,
// Update info for all live variables
for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = Register::index2VirtReg(i);
+ Register Reg = Register::index2VirtReg(i);
// If the Defs is defined in the successor it can't be live in BB.
if (Defs.count(Reg))
@@ -819,7 +818,7 @@ void LiveVariables::addNewBlock(MachineBasicBlock *BB,
SparseBitVector<> &BV = LiveInSets[SuccBB->getNumber()];
for (auto R = BV.begin(), E = BV.end(); R != E; R++) {
- unsigned VirtReg = Register::index2VirtReg(*R);
+ Register VirtReg = Register::index2VirtReg(*R);
LiveVariables::VarInfo &VI = getVarInfo(VirtReg);
VI.AliveBlocks.set(NumNew);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
index 204fb556d810..ec6e693e8a46 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -117,7 +117,7 @@ bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) {
// If the target doesn't want/need this pass, or if there are no locals
// to consider, early exit.
- if (!TRI->requiresVirtualBaseRegisters(MF) || LocalObjectCount == 0)
+ if (LocalObjectCount == 0 || !TRI->requiresVirtualBaseRegisters(MF))
return true;
// Make sure we have enough space to store the local offsets.
@@ -416,15 +416,16 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
const TargetRegisterClass *RC = TRI->getPointerRegClass(*MF);
BaseReg = Fn.getRegInfo().createVirtualRegister(RC);
- LLVM_DEBUG(dbgs() << " Materializing base register " << BaseReg
+ LLVM_DEBUG(dbgs() << " Materializing base register"
<< " at frame local offset "
- << LocalOffset + InstrOffset << "\n");
+ << LocalOffset + InstrOffset);
// Tell the target to insert the instruction to initialize
// the base register.
// MachineBasicBlock::iterator InsertionPt = Entry->begin();
- TRI->materializeFrameBaseRegister(Entry, BaseReg, FrameIdx,
- InstrOffset);
+ BaseReg = TRI->materializeFrameBaseRegister(Entry, FrameIdx, InstrOffset);
+
+ LLVM_DEBUG(dbgs() << " into " << printReg(BaseReg, TRI) << '\n');
// The base register already includes any offset specified
// by the instruction, so account for that so it doesn't get
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LowLevelType.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LowLevelType.cpp
index 33752a1f9230..2bda586db8c7 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LowLevelType.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LowLevelType.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/LowLevelType.h"
+#include "llvm/ADT/APFloat.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/Support/raw_ostream.h"
@@ -58,3 +59,18 @@ LLT llvm::getLLTForMVT(MVT Ty) {
return LLT::vector(Ty.getVectorNumElements(),
Ty.getVectorElementType().getSizeInBits());
}
+
+const llvm::fltSemantics &llvm::getFltSemanticForLLT(LLT Ty) {
+ assert(Ty.isScalar() && "Expected a scalar type.");
+ switch (Ty.getSizeInBits()) {
+ case 16:
+ return APFloat::IEEEhalf();
+ case 32:
+ return APFloat::IEEEsingle();
+ case 64:
+ return APFloat::IEEEdouble();
+ case 128:
+ return APFloat::IEEEquad();
+ }
+ llvm_unreachable("Invalid FP type size.");
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp
index 36b863178b47..a06d1d6255c7 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp
@@ -15,8 +15,8 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
@@ -44,6 +44,7 @@ private:
GlobalVariable *to) {
to->setLinkage(from->getLinkage());
to->setVisibility(from->getVisibility());
+ to->setDSOLocal(from->isDSOLocal());
if (from->hasComdat()) {
to->setComdat(M.getOrInsertComdat(to->getName()));
to->getComdat()->setSelectionKind(from->getComdat()->getSelectionKind());
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MBFIWrapper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MBFIWrapper.cpp
index 5110f75ebb42..4755defec793 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MBFIWrapper.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MBFIWrapper.cpp
@@ -30,6 +30,18 @@ void MBFIWrapper::setBlockFreq(const MachineBasicBlock *MBB,
MergedBBFreq[MBB] = F;
}
+Optional<uint64_t>
+MBFIWrapper::getBlockProfileCount(const MachineBasicBlock *MBB) const {
+ auto I = MergedBBFreq.find(MBB);
+
+ // Modified block frequency also impacts profile count. So we should compute
+ // profile count from new block frequency if it has been changed.
+ if (I != MergedBBFreq.end())
+ return MBFI.getProfileCountFromFreq(I->second.getFrequency());
+
+ return MBFI.getBlockProfileCount(MBB);
+}
+
raw_ostream & MBFIWrapper::printBlockFreq(raw_ostream &OS,
const MachineBasicBlock *MBB) const {
return MBFI.printBlockFreq(OS, getBlockFreq(MBB));
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
index 9eddb8626f60..8ef6aca602a1 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
@@ -85,9 +85,7 @@ static std::vector<MachineBasicBlock *> GetRPOList(MachineFunction &MF) {
return {};
ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
std::vector<MachineBasicBlock *> RPOList;
- for (auto MBB : RPOT) {
- RPOList.push_back(MBB);
- }
+ append_range(RPOList, RPOT);
return RPOList;
}
@@ -108,7 +106,7 @@ rescheduleLexographically(std::vector<MachineInstr *> instructions,
OS.flush();
// Trim the assignment, or start from the beginning in the case of a store.
- const size_t i = S.find("=");
+ const size_t i = S.find('=');
StringInstrMap.push_back({(i == std::string::npos) ? S : S.substr(i), II});
}
@@ -198,8 +196,7 @@ static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount,
if (II->getOperand(i).isReg()) {
if (!Register::isVirtualRegister(II->getOperand(i).getReg()))
- if (llvm::find(PhysRegDefs, II->getOperand(i).getReg()) ==
- PhysRegDefs.end()) {
+ if (!llvm::is_contained(PhysRegDefs, II->getOperand(i).getReg())) {
continue;
}
}
@@ -276,9 +273,9 @@ static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount,
// Sort the defs for users of multiple defs lexographically.
for (const auto &E : MultiUserLookup) {
- auto UseI =
- std::find_if(MBB->instr_begin(), MBB->instr_end(),
- [&](MachineInstr &MI) -> bool { return &MI == E.second; });
+ auto UseI = llvm::find_if(MBB->instrs(), [&](MachineInstr &MI) -> bool {
+ return &MI == E.second;
+ });
if (UseI == MBB->instr_end())
continue;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp
index 98af46dc4872..b86fd6b41318 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp
@@ -212,11 +212,12 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
.Case("contract", MIToken::kw_contract)
.Case("afn", MIToken::kw_afn)
.Case("reassoc", MIToken::kw_reassoc)
- .Case("nuw" , MIToken::kw_nuw)
- .Case("nsw" , MIToken::kw_nsw)
- .Case("exact" , MIToken::kw_exact)
+ .Case("nuw", MIToken::kw_nuw)
+ .Case("nsw", MIToken::kw_nsw)
+ .Case("exact", MIToken::kw_exact)
.Case("nofpexcept", MIToken::kw_nofpexcept)
.Case("debug-location", MIToken::kw_debug_location)
+ .Case("debug-instr-number", MIToken::kw_debug_instr_number)
.Case("same_value", MIToken::kw_cfi_same_value)
.Case("offset", MIToken::kw_cfi_offset)
.Case("rel_offset", MIToken::kw_cfi_rel_offset)
@@ -231,7 +232,8 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
.Case("undefined", MIToken::kw_cfi_undefined)
.Case("register", MIToken::kw_cfi_register)
.Case("window_save", MIToken::kw_cfi_window_save)
- .Case("negate_ra_sign_state", MIToken::kw_cfi_aarch64_negate_ra_sign_state)
+ .Case("negate_ra_sign_state",
+ MIToken::kw_cfi_aarch64_negate_ra_sign_state)
.Case("blockaddress", MIToken::kw_blockaddress)
.Case("intrinsic", MIToken::kw_intrinsic)
.Case("target-index", MIToken::kw_target_index)
@@ -247,6 +249,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
.Case("dereferenceable", MIToken::kw_dereferenceable)
.Case("invariant", MIToken::kw_invariant)
.Case("align", MIToken::kw_align)
+ .Case("basealign", MIToken::kw_align)
.Case("addrspace", MIToken::kw_addrspace)
.Case("stack", MIToken::kw_stack)
.Case("got", MIToken::kw_got)
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.h b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.h
index ef16da94d21b..452eda721331 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.h
@@ -74,6 +74,7 @@ struct MIToken {
kw_exact,
kw_nofpexcept,
kw_debug_location,
+ kw_debug_instr_number,
kw_cfi_same_value,
kw_cfi_offset,
kw_cfi_rel_offset,
@@ -103,6 +104,7 @@ struct MIToken {
kw_non_temporal,
kw_invariant,
kw_align,
+ kw_basealign,
kw_addrspace,
kw_stack,
kw_got,
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp
index ded31cd08fb5..fe979b981886 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp
@@ -369,10 +369,7 @@ static void initSlots2Values(const Function &F,
const Value* PerFunctionMIParsingState::getIRValue(unsigned Slot) {
if (Slots2Values.empty())
initSlots2Values(MF.getFunction(), Slots2Values);
- auto ValueInfo = Slots2Values.find(Slot);
- if (ValueInfo == Slots2Values.end())
- return nullptr;
- return ValueInfo->second;
+ return Slots2Values.lookup(Slot);
}
namespace {
@@ -984,6 +981,7 @@ bool MIParser::parse(MachineInstr *&MI) {
Token.isNot(MIToken::kw_post_instr_symbol) &&
Token.isNot(MIToken::kw_heap_alloc_marker) &&
Token.isNot(MIToken::kw_debug_location) &&
+ Token.isNot(MIToken::kw_debug_instr_number) &&
Token.isNot(MIToken::coloncolon) && Token.isNot(MIToken::lbrace)) {
auto Loc = Token.location();
Optional<unsigned> TiedDefIdx;
@@ -1014,6 +1012,19 @@ bool MIParser::parse(MachineInstr *&MI) {
if (parseHeapAllocMarker(HeapAllocMarker))
return true;
+ unsigned InstrNum = 0;
+ if (Token.is(MIToken::kw_debug_instr_number)) {
+ lex();
+ if (Token.isNot(MIToken::IntegerLiteral))
+ return error("expected an integer literal after 'debug-instr-number'");
+ if (getUnsigned(InstrNum))
+ return true;
+ lex();
+ // Lex past trailing comma if present.
+ if (Token.is(MIToken::comma))
+ lex();
+ }
+
DebugLoc DebugLocation;
if (Token.is(MIToken::kw_debug_location)) {
lex();
@@ -1070,6 +1081,8 @@ bool MIParser::parse(MachineInstr *&MI) {
MI->setHeapAllocMarker(MF, HeapAllocMarker);
if (!MemOperands.empty())
MI->setMemRefs(MF, MemOperands);
+ if (InstrNum)
+ MI->setDebugInstrNum(InstrNum);
return false;
}
@@ -2713,7 +2726,7 @@ bool MIParser::parseOffset(int64_t &Offset) {
}
bool MIParser::parseAlignment(unsigned &Alignment) {
- assert(Token.is(MIToken::kw_align));
+ assert(Token.is(MIToken::kw_align) || Token.is(MIToken::kw_basealign));
lex();
if (Token.isNot(MIToken::IntegerLiteral) || Token.integerValue().isSigned())
return error("expected an integer literal after 'align'");
@@ -3061,6 +3074,12 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) {
while (consumeIfPresent(MIToken::comma)) {
switch (Token.kind()) {
case MIToken::kw_align:
+ // align is printed if it is different than size.
+ if (parseAlignment(BaseAlignment))
+ return true;
+ break;
+ case MIToken::kw_basealign:
+ // basealign is printed if it is different than align.
if (parseAlignment(BaseAlignment))
return true;
break;
@@ -3153,10 +3172,7 @@ static void initSlots2BasicBlocks(
static const BasicBlock *getIRBlockFromSlot(
unsigned Slot,
const DenseMap<unsigned, const BasicBlock *> &Slots2BasicBlocks) {
- auto BlockInfo = Slots2BasicBlocks.find(Slot);
- if (BlockInfo == Slots2BasicBlocks.end())
- return nullptr;
- return BlockInfo->second;
+ return Slots2BasicBlocks.lookup(Slot);
}
const BasicBlock *MIParser::getIRBlock(unsigned Slot) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
index 2e0b0e745e9e..ffa9aeb21edb 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
@@ -161,6 +161,9 @@ private:
SMRange SourceRange);
void computeFunctionProperties(MachineFunction &MF);
+
+ void setupDebugValueTracking(MachineFunction &MF,
+ PerFunctionMIParsingState &PFS, const yaml::MachineFunction &YamlMF);
};
} // end namespace llvm
@@ -322,9 +325,14 @@ bool MIRParserImpl::parseMachineFunction(Module &M, MachineModuleInfo &MMI) {
static bool isSSA(const MachineFunction &MF) {
const MachineRegisterInfo &MRI = MF.getRegInfo();
for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
- unsigned Reg = Register::index2VirtReg(I);
+ Register Reg = Register::index2VirtReg(I);
if (!MRI.hasOneDef(Reg) && !MRI.def_empty(Reg))
return false;
+
+ // Subregister defs are invalid in SSA.
+ const MachineOperand *RegDef = MRI.getOneDef(Reg);
+ if (RegDef && RegDef->getSubReg() != 0)
+ return false;
}
return true;
}
@@ -397,6 +405,23 @@ bool MIRParserImpl::initializeCallSiteInfo(
return false;
}
+void MIRParserImpl::setupDebugValueTracking(
+ MachineFunction &MF, PerFunctionMIParsingState &PFS,
+ const yaml::MachineFunction &YamlMF) {
+ // Compute the value of the "next instruction number" field.
+ unsigned MaxInstrNum = 0;
+ for (auto &MBB : MF)
+ for (auto &MI : MBB)
+ MaxInstrNum = std::max((unsigned)MI.peekDebugInstrNum(), MaxInstrNum);
+ MF.setDebugInstrNumberingCount(MaxInstrNum);
+
+ // Load any substitutions.
+ for (auto &Sub : YamlMF.DebugValueSubstitutions) {
+ MF.makeDebugValueSubstitution(std::make_pair(Sub.SrcInst, Sub.SrcOp),
+ std::make_pair(Sub.DstInst, Sub.DstOp));
+ }
+}
+
bool
MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF,
MachineFunction &MF) {
@@ -446,10 +471,8 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF,
}
// Check Basic Block Section Flags.
if (MF.getTarget().getBBSectionsType() == BasicBlockSection::Labels) {
- MF.createBBLabels();
MF.setBBSectionsType(BasicBlockSection::Labels);
} else if (MF.hasBBSections()) {
- MF.createBBLabels();
MF.assignBeginEndSections();
}
PFS.SM = &SM;
@@ -507,6 +530,8 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF,
if (initializeCallSiteInfo(PFS, YamlMF))
return false;
+ setupDebugValueTracking(MF, PFS, YamlMF);
+
MF.getSubtarget().mirFileLoaded(MF);
MF.verify();
@@ -634,6 +659,12 @@ bool MIRParserImpl::setupRegisterInfo(const PerFunctionMIParsingState &PFS,
// Compute MachineRegisterInfo::UsedPhysRegMask
for (const MachineBasicBlock &MBB : MF) {
+ // Make sure MRI knows about registers clobbered by unwinder.
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ if (MBB.isEHPad())
+ if (auto *RegMask = TRI->getCustomEHPadPreservedMask(MF))
+ MRI.addPhysRegsUsedFromRegMask(RegMask);
+
for (const MachineInstr &MI : MBB) {
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isRegMask())
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp
index fa23df6288e9..eae174019b56 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp
@@ -220,6 +220,10 @@ void MIRPrinter::print(const MachineFunction &MF) {
convert(MST, YamlMF.FrameInfo, MF.getFrameInfo());
convertStackObjects(YamlMF, MF, MST);
convertCallSiteObjects(YamlMF, MF, MST);
+ for (auto &Sub : MF.DebugValueSubstitutions)
+ YamlMF.DebugValueSubstitutions.push_back({Sub.first.first, Sub.first.second,
+ Sub.second.first,
+ Sub.second.second});
if (const auto *ConstantPool = MF.getConstantPool())
convert(YamlMF, *ConstantPool);
if (const auto *JumpTableInfo = MF.getJumpTableInfo())
@@ -363,9 +367,17 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF,
ModuleSlotTracker &MST) {
const MachineFrameInfo &MFI = MF.getFrameInfo();
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+
// Process fixed stack objects.
+ assert(YMF.FixedStackObjects.empty());
+ SmallVector<int, 32> FixedStackObjectsIdx;
+ const int BeginIdx = MFI.getObjectIndexBegin();
+ if (BeginIdx < 0)
+ FixedStackObjectsIdx.reserve(-BeginIdx);
+
unsigned ID = 0;
- for (int I = MFI.getObjectIndexBegin(); I < 0; ++I, ++ID) {
+ for (int I = BeginIdx; I < 0; ++I, ++ID) {
+ FixedStackObjectsIdx.push_back(-1); // Fill index for possible dead.
if (MFI.isDeadObjectIndex(I))
continue;
@@ -380,14 +392,22 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF,
YamlObject.StackID = (TargetStackID::Value)MFI.getStackID(I);
YamlObject.IsImmutable = MFI.isImmutableObjectIndex(I);
YamlObject.IsAliased = MFI.isAliasedObjectIndex(I);
+ // Save the ID' position in FixedStackObjects storage vector.
+ FixedStackObjectsIdx[ID] = YMF.FixedStackObjects.size();
YMF.FixedStackObjects.push_back(YamlObject);
StackObjectOperandMapping.insert(
std::make_pair(I, FrameIndexOperand::createFixed(ID)));
}
// Process ordinary stack objects.
+ assert(YMF.StackObjects.empty());
+ SmallVector<unsigned, 32> StackObjectsIdx;
+ const int EndIdx = MFI.getObjectIndexEnd();
+ if (EndIdx > 0)
+ StackObjectsIdx.reserve(EndIdx);
ID = 0;
- for (int I = 0, E = MFI.getObjectIndexEnd(); I < E; ++I, ++ID) {
+ for (int I = 0; I < EndIdx; ++I, ++ID) {
+ StackObjectsIdx.push_back(-1); // Fill index for possible dead.
if (MFI.isDeadObjectIndex(I))
continue;
@@ -395,7 +415,7 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF,
YamlObject.ID = ID;
if (const auto *Alloca = MFI.getObjectAllocation(I))
YamlObject.Name.Value = std::string(
- Alloca->hasName() ? Alloca->getName() : "<unnamed alloca>");
+ Alloca->hasName() ? Alloca->getName() : "");
YamlObject.Type = MFI.isSpillSlotObjectIndex(I)
? yaml::MachineStackObject::SpillSlot
: MFI.isVariableSizedObjectIndex(I)
@@ -406,41 +426,42 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF,
YamlObject.Alignment = MFI.getObjectAlign(I);
YamlObject.StackID = (TargetStackID::Value)MFI.getStackID(I);
+ // Save the ID' position in StackObjects storage vector.
+ StackObjectsIdx[ID] = YMF.StackObjects.size();
YMF.StackObjects.push_back(YamlObject);
StackObjectOperandMapping.insert(std::make_pair(
I, FrameIndexOperand::create(YamlObject.Name.Value, ID)));
}
for (const auto &CSInfo : MFI.getCalleeSavedInfo()) {
- if (!CSInfo.isSpilledToReg() && MFI.isDeadObjectIndex(CSInfo.getFrameIdx()))
+ const int FrameIdx = CSInfo.getFrameIdx();
+ if (!CSInfo.isSpilledToReg() && MFI.isDeadObjectIndex(FrameIdx))
continue;
yaml::StringValue Reg;
printRegMIR(CSInfo.getReg(), Reg, TRI);
if (!CSInfo.isSpilledToReg()) {
- auto StackObjectInfo = StackObjectOperandMapping.find(CSInfo.getFrameIdx());
- assert(StackObjectInfo != StackObjectOperandMapping.end() &&
+ assert(FrameIdx >= MFI.getObjectIndexBegin() &&
+ FrameIdx < MFI.getObjectIndexEnd() &&
"Invalid stack object index");
- const FrameIndexOperand &StackObject = StackObjectInfo->second;
- if (StackObject.IsFixed) {
- YMF.FixedStackObjects[StackObject.ID].CalleeSavedRegister = Reg;
- YMF.FixedStackObjects[StackObject.ID].CalleeSavedRestored =
- CSInfo.isRestored();
+ if (FrameIdx < 0) { // Negative index means fixed objects.
+ auto &Object =
+ YMF.FixedStackObjects
+ [FixedStackObjectsIdx[FrameIdx + MFI.getNumFixedObjects()]];
+ Object.CalleeSavedRegister = Reg;
+ Object.CalleeSavedRestored = CSInfo.isRestored();
} else {
- YMF.StackObjects[StackObject.ID].CalleeSavedRegister = Reg;
- YMF.StackObjects[StackObject.ID].CalleeSavedRestored =
- CSInfo.isRestored();
+ auto &Object = YMF.StackObjects[StackObjectsIdx[FrameIdx]];
+ Object.CalleeSavedRegister = Reg;
+ Object.CalleeSavedRestored = CSInfo.isRestored();
}
}
}
for (unsigned I = 0, E = MFI.getLocalFrameObjectCount(); I < E; ++I) {
auto LocalObject = MFI.getLocalFrameObjectMap(I);
- auto StackObjectInfo = StackObjectOperandMapping.find(LocalObject.first);
- assert(StackObjectInfo != StackObjectOperandMapping.end() &&
- "Invalid stack object index");
- const FrameIndexOperand &StackObject = StackObjectInfo->second;
- assert(!StackObject.IsFixed && "Expected a locally mapped stack object");
- YMF.StackObjects[StackObject.ID].LocalOffset = LocalObject.second;
+ assert(LocalObject.first >= 0 && "Expected a locally mapped stack object");
+ YMF.StackObjects[StackObjectsIdx[LocalObject.first]].LocalOffset =
+ LocalObject.second;
}
// Print the stack object references in the frame information class after
@@ -454,15 +475,16 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF,
// Print the debug variable information.
for (const MachineFunction::VariableDbgInfo &DebugVar :
MF.getVariableDbgInfo()) {
- auto StackObjectInfo = StackObjectOperandMapping.find(DebugVar.Slot);
- assert(StackObjectInfo != StackObjectOperandMapping.end() &&
+ assert(DebugVar.Slot >= MFI.getObjectIndexBegin() &&
+ DebugVar.Slot < MFI.getObjectIndexEnd() &&
"Invalid stack object index");
- const FrameIndexOperand &StackObject = StackObjectInfo->second;
- if (StackObject.IsFixed) {
- auto &Object = YMF.FixedStackObjects[StackObject.ID];
+ if (DebugVar.Slot < 0) { // Negative index means fixed objects.
+ auto &Object =
+ YMF.FixedStackObjects[FixedStackObjectsIdx[DebugVar.Slot +
+ MFI.getNumFixedObjects()]];
printStackObjectDbgInfo(DebugVar, Object, MST);
} else {
- auto &Object = YMF.StackObjects[StackObject.ID];
+ auto &Object = YMF.StackObjects[StackObjectsIdx[DebugVar.Slot]];
printStackObjectDbgInfo(DebugVar, Object, MST);
}
}
@@ -608,58 +630,10 @@ bool MIPrinter::canPredictSuccessors(const MachineBasicBlock &MBB) const {
void MIPrinter::print(const MachineBasicBlock &MBB) {
assert(MBB.getNumber() >= 0 && "Invalid MBB number");
- OS << "bb." << MBB.getNumber();
- bool HasAttributes = false;
- if (const auto *BB = MBB.getBasicBlock()) {
- if (BB->hasName()) {
- OS << "." << BB->getName();
- } else {
- HasAttributes = true;
- OS << " (";
- int Slot = MST.getLocalSlot(BB);
- if (Slot == -1)
- OS << "<ir-block badref>";
- else
- OS << (Twine("%ir-block.") + Twine(Slot)).str();
- }
- }
- if (MBB.hasAddressTaken()) {
- OS << (HasAttributes ? ", " : " (");
- OS << "address-taken";
- HasAttributes = true;
- }
- if (MBB.isEHPad()) {
- OS << (HasAttributes ? ", " : " (");
- OS << "landing-pad";
- HasAttributes = true;
- }
- if (MBB.isEHFuncletEntry()) {
- OS << (HasAttributes ? ", " : " (");
- OS << "ehfunclet-entry";
- HasAttributes = true;
- }
- if (MBB.getAlignment() != Align(1)) {
- OS << (HasAttributes ? ", " : " (");
- OS << "align " << MBB.getAlignment().value();
- HasAttributes = true;
- }
- if (MBB.getSectionID() != MBBSectionID(0)) {
- OS << (HasAttributes ? ", " : " (");
- OS << "bbsections ";
- switch (MBB.getSectionID().Type) {
- case MBBSectionID::SectionType::Exception:
- OS << "Exception";
- break;
- case MBBSectionID::SectionType::Cold:
- OS << "Cold";
- break;
- default:
- OS << MBB.getSectionID().Number;
- }
- HasAttributes = true;
- }
- if (HasAttributes)
- OS << ")";
+ MBB.printName(OS,
+ MachineBasicBlock::PrintNameIr |
+ MachineBasicBlock::PrintNameAttributes,
+ &MST);
OS << ":\n";
bool HasLineAttributes = false;
@@ -818,6 +792,13 @@ void MIPrinter::print(const MachineInstr &MI) {
NeedComma = true;
}
+ if (auto Num = MI.peekDebugInstrNum()) {
+ if (NeedComma)
+ OS << ',';
+ OS << " debug-instr-number " << Num;
+ NeedComma = true;
+ }
+
if (PrintLocations) {
if (const DebugLoc &DL = MI.getDebugLoc()) {
if (NeedComma)
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp
index 54441301d65b..3d4f66f31174 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp
@@ -8,6 +8,7 @@
#include "MIRVRegNamerUtils.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineStableHash.h"
#include "llvm/IR/Constants.h"
#include "llvm/Support/Debug.h"
@@ -15,6 +16,11 @@ using namespace llvm;
#define DEBUG_TYPE "mir-vregnamer-utils"
+static cl::opt<bool>
+ UseStableNamerHash("mir-vreg-namer-use-stable-hash", cl::init(false),
+ cl::Hidden,
+ cl::desc("Use Stable Hashing for MIR VReg Renaming"));
+
using VRegRenameMap = std::map<unsigned, unsigned>;
bool VRegRenamer::doVRegRenaming(const VRegRenameMap &VRM) {
@@ -52,6 +58,14 @@ std::string VRegRenamer::getInstructionOpcodeHash(MachineInstr &MI) {
std::string S;
raw_string_ostream OS(S);
+ if (UseStableNamerHash) {
+ auto Hash = stableHashValue(MI, /* HashVRegs */ true,
+ /* HashConstantPoolIndices */ true,
+ /* HashMemOperands */ true);
+ assert(Hash && "Expected non-zero Hash");
+ return std::to_string(Hash).substr(0, 5);
+ }
+
// Gets a hashable artifact from a given MachineOperand (ie an unsigned).
auto GetHashableMO = [this](const MachineOperand &MO) -> unsigned {
switch (MO.getType()) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp
index 2d4b60435d96..b4187af02975 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -60,38 +60,25 @@ MCSymbol *MachineBasicBlock::getSymbol() const {
if (!CachedMCSymbol) {
const MachineFunction *MF = getParent();
MCContext &Ctx = MF->getContext();
- auto Prefix = Ctx.getAsmInfo()->getPrivateLabelPrefix();
- assert(getNumber() >= 0 && "cannot get label for unreachable MBB");
-
- // We emit a non-temporary symbol for every basic block if we have BBLabels
- // or -- with basic block sections -- when a basic block begins a section.
- // With basic block symbols, we use a unary encoding which can
- // compress the symbol names significantly. For basic block sections where
- // this block is the first in a cluster, we use a non-temp descriptive name.
- // Otherwise we fall back to use temp label.
- if (MF->hasBBLabels()) {
- auto Iter = MF->getBBSectionsSymbolPrefix().begin();
- if (getNumber() < 0 ||
- getNumber() >= (int)MF->getBBSectionsSymbolPrefix().size())
- report_fatal_error("Unreachable MBB: " + Twine(getNumber()));
- // The basic blocks for function foo are named a.BB.foo, aa.BB.foo, and
- // so on.
- std::string Prefix(Iter + 1, Iter + getNumber() + 1);
- std::reverse(Prefix.begin(), Prefix.end());
- CachedMCSymbol =
- Ctx.getOrCreateSymbol(Twine(Prefix) + ".BB." + Twine(MF->getName()));
- } else if (MF->hasBBSections() && isBeginSection()) {
+ // We emit a non-temporary symbol -- with a descriptive name -- if it begins
+ // a section (with basic block sections). Otherwise we fall back to use temp
+ // label.
+ if (MF->hasBBSections() && isBeginSection()) {
SmallString<5> Suffix;
if (SectionID == MBBSectionID::ColdSectionID) {
Suffix += ".cold";
} else if (SectionID == MBBSectionID::ExceptionSectionID) {
Suffix += ".eh";
} else {
- Suffix += "." + std::to_string(SectionID.Number);
+ // For symbols that represent basic block sections, we add ".__part." to
+ // allow tools like symbolizers to know that this represents a part of
+ // the original function.
+ Suffix = (Suffix + Twine(".__part.") + Twine(SectionID.Number)).str();
}
CachedMCSymbol = Ctx.getOrCreateSymbol(MF->getName() + Suffix);
} else {
+ const StringRef Prefix = Ctx.getAsmInfo()->getPrivateLabelPrefix();
CachedMCSymbol = Ctx.getOrCreateSymbol(Twine(Prefix) + "BB" +
Twine(MF->getFunctionNumber()) +
"_" + Twine(getNumber()));
@@ -100,6 +87,17 @@ MCSymbol *MachineBasicBlock::getSymbol() const {
return CachedMCSymbol;
}
+MCSymbol *MachineBasicBlock::getEndSymbol() const {
+ if (!CachedEndMCSymbol) {
+ const MachineFunction *MF = getParent();
+ MCContext &Ctx = MF->getContext();
+ auto Prefix = Ctx.getAsmInfo()->getPrivateLabelPrefix();
+ CachedEndMCSymbol = Ctx.getOrCreateSymbol(Twine(Prefix) + "BB_END" +
+ Twine(MF->getFunctionNumber()) +
+ "_" + Twine(getNumber()));
+ }
+ return CachedEndMCSymbol;
+}
raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineBasicBlock &MBB) {
MBB.print(OS);
@@ -271,6 +269,10 @@ bool MachineBasicBlock::hasEHPadSuccessor() const {
return false;
}
+bool MachineBasicBlock::isEntryBlock() const {
+ return getParent()->begin() == getIterator();
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void MachineBasicBlock::dump() const {
print(dbgs());
@@ -338,39 +340,7 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
if (Indexes && PrintSlotIndexes)
OS << Indexes->getMBBStartIdx(this) << '\t';
- OS << "bb." << getNumber();
- bool HasAttributes = false;
- if (const auto *BB = getBasicBlock()) {
- if (BB->hasName()) {
- OS << "." << BB->getName();
- } else {
- HasAttributes = true;
- OS << " (";
- int Slot = MST.getLocalSlot(BB);
- if (Slot == -1)
- OS << "<ir-block badref>";
- else
- OS << (Twine("%ir-block.") + Twine(Slot)).str();
- }
- }
-
- if (hasAddressTaken()) {
- OS << (HasAttributes ? ", " : " (");
- OS << "address-taken";
- HasAttributes = true;
- }
- if (isEHPad()) {
- OS << (HasAttributes ? ", " : " (");
- OS << "landing-pad";
- HasAttributes = true;
- }
- if (getAlignment() != Align(1)) {
- OS << (HasAttributes ? ", " : " (");
- OS << "align " << Log2(getAlignment());
- HasAttributes = true;
- }
- if (HasAttributes)
- OS << ")";
+ printName(OS, PrintNameIr | PrintNameAttributes, &MST);
OS << ":\n";
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
@@ -383,11 +353,9 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
if (Indexes) OS << '\t';
// Don't indent(2), align with previous line attributes.
OS << "; predecessors: ";
- for (auto I = pred_begin(), E = pred_end(); I != E; ++I) {
- if (I != pred_begin())
- OS << ", ";
- OS << printMBBReference(**I);
- }
+ ListSeparator LS;
+ for (auto *Pred : predecessors())
+ OS << LS << printMBBReference(*Pred);
OS << '\n';
HasLineAttributes = true;
}
@@ -396,10 +364,9 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
if (Indexes) OS << '\t';
// Print the successors
OS.indent(2) << "successors: ";
+ ListSeparator LS;
for (auto I = succ_begin(), E = succ_end(); I != E; ++I) {
- if (I != succ_begin())
- OS << ", ";
- OS << printMBBReference(**I);
+ OS << LS << printMBBReference(**I);
if (!Probs.empty())
OS << '('
<< format("0x%08" PRIx32, getSuccProbability(I).getNumerator())
@@ -408,11 +375,10 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
if (!Probs.empty() && IsStandalone) {
// Print human readable probabilities as comments.
OS << "; ";
+ ListSeparator LS;
for (auto I = succ_begin(), E = succ_end(); I != E; ++I) {
const BranchProbability &BP = getSuccProbability(I);
- if (I != succ_begin())
- OS << ", ";
- OS << printMBBReference(**I) << '('
+ OS << LS << printMBBReference(**I) << '('
<< format("%.2f%%",
rint(((double)BP.getNumerator() / BP.getDenominator()) *
100.0 * 100.0) /
@@ -429,12 +395,9 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
if (Indexes) OS << '\t';
OS.indent(2) << "liveins: ";
- bool First = true;
+ ListSeparator LS;
for (const auto &LI : liveins()) {
- if (!First)
- OS << ", ";
- First = false;
- OS << printReg(LI.PhysReg, TRI);
+ OS << LS << printReg(LI.PhysReg, TRI);
if (!LI.LaneMask.all())
OS << ":0x" << PrintLaneMask(LI.LaneMask);
}
@@ -478,9 +441,99 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
}
}
+/// Print the basic block's name as:
+///
+/// bb.{number}[.{ir-name}] [(attributes...)]
+///
+/// The {ir-name} is only printed when the \ref PrintNameIr flag is passed
+/// (which is the default). If the IR block has no name, it is identified
+/// numerically using the attribute syntax as "(%ir-block.{ir-slot})".
+///
+/// When the \ref PrintNameAttributes flag is passed, additional attributes
+/// of the block are printed when set.
+///
+/// \param printNameFlags Combination of \ref PrintNameFlag flags indicating
+/// the parts to print.
+/// \param moduleSlotTracker Optional ModuleSlotTracker. This method will
+/// incorporate its own tracker when necessary to
+/// determine the block's IR name.
+void MachineBasicBlock::printName(raw_ostream &os, unsigned printNameFlags,
+ ModuleSlotTracker *moduleSlotTracker) const {
+ os << "bb." << getNumber();
+ bool hasAttributes = false;
+
+ if (printNameFlags & PrintNameIr) {
+ if (const auto *bb = getBasicBlock()) {
+ if (bb->hasName()) {
+ os << '.' << bb->getName();
+ } else {
+ hasAttributes = true;
+ os << " (";
+
+ int slot = -1;
+
+ if (moduleSlotTracker) {
+ slot = moduleSlotTracker->getLocalSlot(bb);
+ } else if (bb->getParent()) {
+ ModuleSlotTracker tmpTracker(bb->getModule(), false);
+ tmpTracker.incorporateFunction(*bb->getParent());
+ slot = tmpTracker.getLocalSlot(bb);
+ }
+
+ if (slot == -1)
+ os << "<ir-block badref>";
+ else
+ os << (Twine("%ir-block.") + Twine(slot)).str();
+ }
+ }
+ }
+
+ if (printNameFlags & PrintNameAttributes) {
+ if (hasAddressTaken()) {
+ os << (hasAttributes ? ", " : " (");
+ os << "address-taken";
+ hasAttributes = true;
+ }
+ if (isEHPad()) {
+ os << (hasAttributes ? ", " : " (");
+ os << "landing-pad";
+ hasAttributes = true;
+ }
+ if (isEHFuncletEntry()) {
+ os << (hasAttributes ? ", " : " (");
+ os << "ehfunclet-entry";
+ hasAttributes = true;
+ }
+ if (getAlignment() != Align(1)) {
+ os << (hasAttributes ? ", " : " (");
+ os << "align " << getAlignment().value();
+ hasAttributes = true;
+ }
+ if (getSectionID() != MBBSectionID(0)) {
+ os << (hasAttributes ? ", " : " (");
+ os << "bbsections ";
+ switch (getSectionID().Type) {
+ case MBBSectionID::SectionType::Exception:
+ os << "Exception";
+ break;
+ case MBBSectionID::SectionType::Cold:
+ os << "Cold";
+ break;
+ default:
+ os << getSectionID().Number;
+ }
+ hasAttributes = true;
+ }
+ }
+
+ if (hasAttributes)
+ os << ')';
+}
+
void MachineBasicBlock::printAsOperand(raw_ostream &OS,
bool /*PrintType*/) const {
- OS << "%bb." << getNumber();
+ OS << '%';
+ printName(OS, 0);
}
void MachineBasicBlock::removeLiveIn(MCPhysReg Reg, LaneBitmask LaneMask) {
@@ -530,7 +583,7 @@ void MachineBasicBlock::sortUniqueLiveIns() {
Register
MachineBasicBlock::addLiveIn(MCRegister PhysReg, const TargetRegisterClass *RC) {
assert(getParent() && "MBB must be inserted in function");
- assert(PhysReg.isPhysical() && "Expected physreg");
+ assert(Register::isPhysicalRegister(PhysReg) && "Expected physreg");
assert(RC && "Register class is required");
assert((isEHPad() || this == &getParent()->front()) &&
"Only the entry block and landing pads can have physreg live ins");
@@ -696,7 +749,7 @@ void MachineBasicBlock::splitSuccessor(MachineBasicBlock *Old,
bool NormalizeSuccProbs) {
succ_iterator OldI = llvm::find(successors(), Old);
assert(OldI != succ_end() && "Old is not a successor of this block!");
- assert(llvm::find(successors(), New) == succ_end() &&
+ assert(!llvm::is_contained(successors(), New) &&
"New is already a successor of this block!");
// Add a new successor with equal probability as the original one. Note
@@ -775,7 +828,7 @@ void MachineBasicBlock::replaceSuccessor(MachineBasicBlock *Old,
void MachineBasicBlock::copySuccessor(MachineBasicBlock *Orig,
succ_iterator I) {
- if (Orig->Probs.empty())
+ if (!Orig->Probs.empty())
addSuccessor(*I, Orig->getSuccProbability(I));
else
addSuccessorWithoutProb(*I);
@@ -891,6 +944,47 @@ bool MachineBasicBlock::canFallThrough() {
return getFallThrough() != nullptr;
}
+MachineBasicBlock *MachineBasicBlock::splitAt(MachineInstr &MI,
+ bool UpdateLiveIns,
+ LiveIntervals *LIS) {
+ MachineBasicBlock::iterator SplitPoint(&MI);
+ ++SplitPoint;
+
+ if (SplitPoint == end()) {
+ // Don't bother with a new block.
+ return this;
+ }
+
+ MachineFunction *MF = getParent();
+
+ LivePhysRegs LiveRegs;
+ if (UpdateLiveIns) {
+ // Make sure we add any physregs we define in the block as liveins to the
+ // new block.
+ MachineBasicBlock::iterator Prev(&MI);
+ LiveRegs.init(*MF->getSubtarget().getRegisterInfo());
+ LiveRegs.addLiveOuts(*this);
+ for (auto I = rbegin(), E = Prev.getReverse(); I != E; ++I)
+ LiveRegs.stepBackward(*I);
+ }
+
+ MachineBasicBlock *SplitBB = MF->CreateMachineBasicBlock(getBasicBlock());
+
+ MF->insert(++MachineFunction::iterator(this), SplitBB);
+ SplitBB->splice(SplitBB->begin(), this, SplitPoint, end());
+
+ SplitBB->transferSuccessorsAndUpdatePHIs(this);
+ addSuccessor(SplitBB);
+
+ if (UpdateLiveIns)
+ addLiveIns(*SplitBB, LiveRegs);
+
+ if (LIS)
+ LIS->insertMBBInMaps(SplitBB);
+
+ return SplitBB;
+}
+
MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(
MachineBasicBlock *Succ, Pass &P,
std::vector<SparseBitVector<>> *LiveInSets) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
index 1168b01a835f..54e0a14e0555 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
@@ -241,16 +241,21 @@ MachineBlockFrequencyInfo::getProfileCountFromFreq(uint64_t Freq) const {
return MBFI ? MBFI->getProfileCountFromFreq(F, Freq) : None;
}
-bool
-MachineBlockFrequencyInfo::isIrrLoopHeader(const MachineBasicBlock *MBB) {
+bool MachineBlockFrequencyInfo::isIrrLoopHeader(
+ const MachineBasicBlock *MBB) const {
assert(MBFI && "Expected analysis to be available");
return MBFI->isIrrLoopHeader(MBB);
}
-void MachineBlockFrequencyInfo::setBlockFreq(const MachineBasicBlock *MBB,
- uint64_t Freq) {
+void MachineBlockFrequencyInfo::onEdgeSplit(
+ const MachineBasicBlock &NewPredecessor,
+ const MachineBasicBlock &NewSuccessor,
+ const MachineBranchProbabilityInfo &MBPI) {
assert(MBFI && "Expected analysis to be available");
- MBFI->setBlockFreq(MBB, Freq);
+ auto NewSuccFreq = MBFI->getBlockFreq(&NewPredecessor) *
+ MBPI.getEdgeProbability(&NewPredecessor, &NewSuccessor);
+
+ MBFI->setBlockFreq(&NewSuccessor, NewSuccFreq.getFrequency());
}
const MachineFunction *MachineBlockFrequencyInfo::getFunction() const {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index 783d22fafee9..048baa460e49 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -177,6 +177,14 @@ static cl::opt<unsigned> TailDupPlacementPenalty(
cl::init(2),
cl::Hidden);
+// Heuristic for tail duplication if profile count is used in cost model.
+static cl::opt<unsigned> TailDupProfilePercentThreshold(
+ "tail-dup-profile-percent-threshold",
+ cl::desc("If profile count information is used in tail duplication cost "
+ "model, the gained fall through number from tail duplication "
+ "should be at least this percent of hot count."),
+ cl::init(50), cl::Hidden);
+
// Heuristic for triangle chains.
static cl::opt<unsigned> TriangleChainCount(
"triangle-chain-count",
@@ -377,6 +385,10 @@ class MachineBlockPlacement : public MachineFunctionPass {
/// Partial tail duplication threshold.
BlockFrequency DupThreshold;
+ /// True: use block profile count to compute tail duplication cost.
+ /// False: use block frequency to compute tail duplication cost.
+ bool UseProfileCount;
+
/// Allocator and owner of BlockChain structures.
///
/// We build BlockChains lazily while processing the loop structure of
@@ -402,6 +414,19 @@ class MachineBlockPlacement : public MachineFunctionPass {
SmallPtrSet<MachineBasicBlock *, 4> BlocksWithUnanalyzableExits;
#endif
+ /// Get block profile count or frequency according to UseProfileCount.
+ /// The return value is used to model tail duplication cost.
+ BlockFrequency getBlockCountOrFrequency(const MachineBasicBlock *BB) {
+ if (UseProfileCount) {
+ auto Count = MBFI->getBlockProfileCount(BB);
+ if (Count)
+ return *Count;
+ else
+ return 0;
+ } else
+ return MBFI->getBlockFreq(BB);
+ }
+
/// Scale the DupThreshold according to basic block size.
BlockFrequency scaleThreshold(MachineBasicBlock *BB);
void initDupThreshold();
@@ -424,10 +449,6 @@ class MachineBlockPlacement : public MachineFunctionPass {
const MachineBasicBlock *BB, const BlockChain &Chain,
const BlockFilterSet *BlockFilter,
SmallVector<MachineBasicBlock *, 4> &Successors);
- bool shouldPredBlockBeOutlined(
- const MachineBasicBlock *BB, const MachineBasicBlock *Succ,
- const BlockChain &Chain, const BlockFilterSet *BlockFilter,
- BranchProbability SuccProb, BranchProbability HotProb);
bool isBestSuccessor(MachineBasicBlock *BB, MachineBasicBlock *Pred,
BlockFilterSet *BlockFilter);
void findDuplicateCandidates(SmallVectorImpl<MachineBasicBlock *> &Candidates,
@@ -1652,11 +1673,9 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
// worklist of already placed entries.
// FIXME: If this shows up on profiles, it could be folded (at the cost of
// some code complexity) into the loop below.
- WorkList.erase(llvm::remove_if(WorkList,
- [&](MachineBasicBlock *BB) {
- return BlockToChain.lookup(BB) == &Chain;
- }),
- WorkList.end());
+ llvm::erase_if(WorkList, [&](MachineBasicBlock *BB) {
+ return BlockToChain.lookup(BB) == &Chain;
+ });
if (WorkList.empty())
return nullptr;
@@ -2287,6 +2306,10 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
if (Bottom == ExitingBB)
return;
+ // The entry block should always be the first BB in a function.
+ if (Top->isEntryBlock())
+ return;
+
bool ViableTopFallthrough = hasViableTopFallthrough(Top, LoopBlockSet);
// If the header has viable fallthrough, check whether the current loop
@@ -2361,6 +2384,11 @@ void MachineBlockPlacement::rotateLoopWithProfile(
BlockChain &LoopChain, const MachineLoop &L,
const BlockFilterSet &LoopBlockSet) {
auto RotationPos = LoopChain.end();
+ MachineBasicBlock *ChainHeaderBB = *LoopChain.begin();
+
+ // The entry block should always be the first BB in a function.
+ if (ChainHeaderBB->isEntryBlock())
+ return;
BlockFrequency SmallestRotationCost = BlockFrequency::getMaxFrequency();
@@ -2379,7 +2407,6 @@ void MachineBlockPlacement::rotateLoopWithProfile(
// chain head is not the loop header. As we only consider natural loops with
// single header, this computation can be done only once.
BlockFrequency HeaderFallThroughCost(0);
- MachineBasicBlock *ChainHeaderBB = *LoopChain.begin();
for (auto *Pred : ChainHeaderBB->predecessors()) {
BlockChain *PredChain = BlockToChain[Pred];
if (!LoopBlockSet.count(Pred) &&
@@ -2516,10 +2543,14 @@ MachineBlockPlacement::collectLoopBlockSet(const MachineLoop &L) {
MBPI->getEdgeProbability(LoopPred, L.getHeader());
for (MachineBasicBlock *LoopBB : L.getBlocks()) {
+ if (LoopBlockSet.count(LoopBB))
+ continue;
auto Freq = MBFI->getBlockFreq(LoopBB).getFrequency();
if (Freq == 0 || LoopFreq.getFrequency() / Freq > LoopToColdBlockRatio)
continue;
- LoopBlockSet.insert(LoopBB);
+ BlockChain *Chain = BlockToChain[LoopBB];
+ for (MachineBasicBlock *ChainBB : *Chain)
+ LoopBlockSet.insert(ChainBB);
}
} else
LoopBlockSet.insert(L.block_begin(), L.block_end());
@@ -3011,12 +3042,7 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock(
SmallVectorImpl<MachineBasicBlock *> &RemoveList = BlockWorkList;
if (RemBB->isEHPad())
RemoveList = EHPadWorkList;
- RemoveList.erase(
- llvm::remove_if(RemoveList,
- [RemBB](MachineBasicBlock *BB) {
- return BB == RemBB;
- }),
- RemoveList.end());
+ llvm::erase_value(RemoveList, RemBB);
}
// Handle the filter set
@@ -3120,7 +3146,7 @@ bool MachineBlockPlacement::isBestSuccessor(MachineBasicBlock *BB,
// Compute the number of reduced taken branches if Pred falls through to BB
// instead of another successor. Then compare it with threshold.
- BlockFrequency PredFreq = MBFI->getBlockFreq(Pred);
+ BlockFrequency PredFreq = getBlockCountOrFrequency(Pred);
BlockFrequency Gain = PredFreq * (BBProb - BestProb);
return Gain > scaleThreshold(BB);
}
@@ -3134,8 +3160,8 @@ void MachineBlockPlacement::findDuplicateCandidates(
MachineBasicBlock *Fallthrough = nullptr;
BranchProbability DefaultBranchProb = BranchProbability::getZero();
BlockFrequency BBDupThreshold(scaleThreshold(BB));
- SmallVector<MachineBasicBlock *, 8> Preds(BB->pred_begin(), BB->pred_end());
- SmallVector<MachineBasicBlock *, 8> Succs(BB->succ_begin(), BB->succ_end());
+ SmallVector<MachineBasicBlock *, 8> Preds(BB->predecessors());
+ SmallVector<MachineBasicBlock *, 8> Succs(BB->successors());
// Sort for highest frequency.
auto CmpSucc = [&](MachineBasicBlock *A, MachineBasicBlock *B) {
@@ -3194,7 +3220,7 @@ void MachineBlockPlacement::findDuplicateCandidates(
// it. But it can beneficially fall through to BB, and duplicate BB into other
// predecessors.
for (MachineBasicBlock *Pred : Preds) {
- BlockFrequency PredFreq = MBFI->getBlockFreq(Pred);
+ BlockFrequency PredFreq = getBlockCountOrFrequency(Pred);
if (!TailDup.canTailDuplicate(BB, Pred)) {
// BB can't be duplicated into Pred, but it is possible to be layout
@@ -3243,6 +3269,15 @@ void MachineBlockPlacement::initDupThreshold() {
if (!F->getFunction().hasProfileData())
return;
+ // We prefer to use prifile count.
+ uint64_t HotThreshold = PSI->getOrCompHotCountThreshold();
+ if (HotThreshold != UINT64_MAX) {
+ UseProfileCount = true;
+ DupThreshold = HotThreshold * TailDupProfilePercentThreshold / 100;
+ return;
+ }
+
+ // Profile count is not available, we can use block frequency instead.
BlockFrequency MaxFreq = 0;
for (MachineBasicBlock &MBB : *F) {
BlockFrequency Freq = MBFI->getBlockFreq(&MBB);
@@ -3250,10 +3285,9 @@ void MachineBlockPlacement::initDupThreshold() {
MaxFreq = Freq;
}
- // FIXME: we may use profile count instead of frequency,
- // and need more fine tuning.
BranchProbability ThresholdProb(TailDupPlacementPenalty, 100);
DupThreshold = MaxFreq * ThresholdProb;
+ UseProfileCount = false;
}
bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
@@ -3326,8 +3360,8 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
// No tail merging opportunities if the block number is less than four.
if (MF.size() > 3 && EnableTailMerge) {
unsigned TailMergeSize = TailDupSize + 1;
- BranchFolder BF(/*EnableTailMerge=*/true, /*CommonHoist=*/false, *MBFI,
- *MBPI, PSI, TailMergeSize);
+ BranchFolder BF(/*DefaultEnableTailMerge=*/true, /*CommonHoist=*/false,
+ *MBFI, *MBPI, PSI, TailMergeSize);
if (BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(), MLI,
/*AfterPlacement=*/true)) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp
index 09531276bc10..199fe2dc6454 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp
@@ -35,6 +35,7 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/InitializePasses.h"
#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegister.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
#include "llvm/Support/Allocator.h"
@@ -115,18 +116,18 @@ namespace {
bool PerformTrivialCopyPropagation(MachineInstr *MI,
MachineBasicBlock *MBB);
- bool isPhysDefTriviallyDead(unsigned Reg,
+ bool isPhysDefTriviallyDead(MCRegister Reg,
MachineBasicBlock::const_iterator I,
MachineBasicBlock::const_iterator E) const;
bool hasLivePhysRegDefUses(const MachineInstr *MI,
const MachineBasicBlock *MBB,
- SmallSet<unsigned, 8> &PhysRefs,
+ SmallSet<MCRegister, 8> &PhysRefs,
PhysDefVector &PhysDefs, bool &PhysUseDef) const;
bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
- SmallSet<unsigned, 8> &PhysRefs,
+ SmallSet<MCRegister, 8> &PhysRefs,
PhysDefVector &PhysDefs, bool &NonLocal) const;
bool isCSECandidate(MachineInstr *MI);
- bool isProfitableToCSE(unsigned CSReg, unsigned Reg,
+ bool isProfitableToCSE(Register CSReg, Register Reg,
MachineBasicBlock *CSBB, MachineInstr *MI);
void EnterScope(MachineBasicBlock *MBB);
void ExitScope(MachineBasicBlock *MBB);
@@ -218,10 +219,9 @@ bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI,
return Changed;
}
-bool
-MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
- MachineBasicBlock::const_iterator I,
- MachineBasicBlock::const_iterator E) const {
+bool MachineCSE::isPhysDefTriviallyDead(
+ MCRegister Reg, MachineBasicBlock::const_iterator I,
+ MachineBasicBlock::const_iterator E) const {
unsigned LookAheadLeft = LookAheadLimit;
while (LookAheadLeft) {
// Skip over dbg_value's.
@@ -255,7 +255,7 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
return false;
}
-static bool isCallerPreservedOrConstPhysReg(unsigned Reg,
+static bool isCallerPreservedOrConstPhysReg(MCRegister Reg,
const MachineFunction &MF,
const TargetRegisterInfo &TRI) {
// MachineRegisterInfo::isConstantPhysReg directly called by
@@ -276,7 +276,7 @@ static bool isCallerPreservedOrConstPhysReg(unsigned Reg,
/// instruction does not uses a physical register.
bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
const MachineBasicBlock *MBB,
- SmallSet<unsigned, 8> &PhysRefs,
+ SmallSet<MCRegister, 8> &PhysRefs,
PhysDefVector &PhysDefs,
bool &PhysUseDef) const {
// First, add all uses to PhysRefs.
@@ -289,7 +289,7 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
if (Register::isVirtualRegister(Reg))
continue;
// Reading either caller preserved or constant physregs is ok.
- if (!isCallerPreservedOrConstPhysReg(Reg, *MI->getMF(), *TRI))
+ if (!isCallerPreservedOrConstPhysReg(Reg.asMCReg(), *MI->getMF(), *TRI))
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
PhysRefs.insert(*AI);
}
@@ -308,12 +308,12 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
if (Register::isVirtualRegister(Reg))
continue;
// Check against PhysRefs even if the def is "dead".
- if (PhysRefs.count(Reg))
+ if (PhysRefs.count(Reg.asMCReg()))
PhysUseDef = true;
// If the def is dead, it's ok. But the def may not marked "dead". That's
// common since this pass is run before livevariables. We can scan
// forward a few instructions and check if it is obviously dead.
- if (!MO.isDead() && !isPhysDefTriviallyDead(Reg, I, MBB->end()))
+ if (!MO.isDead() && !isPhysDefTriviallyDead(Reg.asMCReg(), I, MBB->end()))
PhysDefs.push_back(std::make_pair(MOP.index(), Reg));
}
@@ -327,7 +327,7 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
}
bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
- SmallSet<unsigned, 8> &PhysRefs,
+ SmallSet<MCRegister, 8> &PhysRefs,
PhysDefVector &PhysDefs,
bool &NonLocal) const {
// For now conservatively returns false if the common subexpression is
@@ -382,7 +382,7 @@ bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
Register MOReg = MO.getReg();
if (Register::isVirtualRegister(MOReg))
continue;
- if (PhysRefs.count(MOReg))
+ if (PhysRefs.count(MOReg.asMCReg()))
return false;
}
@@ -429,7 +429,7 @@ bool MachineCSE::isCSECandidate(MachineInstr *MI) {
/// isProfitableToCSE - Return true if it's profitable to eliminate MI with a
/// common expression that defines Reg. CSBB is basic block where CSReg is
/// defined.
-bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
+bool MachineCSE::isProfitableToCSE(Register CSReg, Register Reg,
MachineBasicBlock *CSBB, MachineInstr *MI) {
// FIXME: Heuristics that works around the lack the live range splitting.
@@ -556,7 +556,7 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {
// used, then it's not safe to replace it with a common subexpression.
// It's also not safe if the instruction uses physical registers.
bool CrossMBBPhysDef = false;
- SmallSet<unsigned, 8> PhysRefs;
+ SmallSet<MCRegister, 8> PhysRefs;
PhysDefVector PhysDefs;
bool PhysUseDef = false;
if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs,
@@ -640,7 +640,7 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {
// Actually perform the elimination.
if (DoCSE) {
- for (std::pair<unsigned, unsigned> &CSEPair : CSEPairs) {
+ for (const std::pair<unsigned, unsigned> &CSEPair : CSEPairs) {
unsigned OldReg = CSEPair.first;
unsigned NewReg = CSEPair.second;
// OldReg may have been unused but is used now, clear the Dead flag
@@ -656,7 +656,7 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {
// we should make sure it is not dead at CSMI.
for (unsigned ImplicitDefToUpdate : ImplicitDefsToUpdate)
CSMI->getOperand(ImplicitDefToUpdate).setIsDead(false);
- for (auto PhysDef : PhysDefs)
+ for (const auto &PhysDef : PhysDefs)
if (!MI->getOperand(PhysDef.first).isDead())
CSMI->getOperand(PhysDef.first).setIsDead(false);
@@ -748,8 +748,7 @@ bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
Node = WorkList.pop_back_val();
Scopes.push_back(Node);
OpenChildren[Node] = Node->getNumChildren();
- for (MachineDomTreeNode *Child : Node->children())
- WorkList.push_back(Child);
+ append_range(WorkList, Node->children());
} while (!WorkList.empty());
// Now perform CSE.
@@ -777,11 +776,11 @@ bool MachineCSE::isPRECandidate(MachineInstr *MI) {
MI->getNumExplicitDefs() != 1)
return false;
- for (auto def : MI->defs())
+ for (const auto &def : MI->defs())
if (!Register::isVirtualRegister(def.getReg()))
return false;
- for (auto use : MI->uses())
+ for (const auto &use : MI->uses())
if (use.isReg() && !Register::isVirtualRegister(use.getReg()))
return false;
@@ -861,8 +860,7 @@ bool MachineCSE::PerformSimplePRE(MachineDominatorTree *DT) {
BBs.push_back(DT->getRootNode());
do {
auto Node = BBs.pop_back_val();
- for (MachineDomTreeNode *Child : Node->children())
- BBs.push_back(Child);
+ append_range(BBs, Node->children());
MachineBasicBlock *MBB = Node->getBlock();
Changed |= ProcessBlockPRE(DT, MBB);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCheckDebugify.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCheckDebugify.cpp
new file mode 100644
index 000000000000..bd7f0f862947
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCheckDebugify.cpp
@@ -0,0 +1,126 @@
+//===- MachineCheckDebugify.cpp - Check debug info ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file This checks debug info after mir-debugify (+ pass-to-test). Currently
+/// it simply checks the integrity of line info in DILocation and
+/// DILocalVariable which mir-debugifiy generated before.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Utils/Debugify.h"
+
+#define DEBUG_TYPE "mir-check-debugify"
+
+using namespace llvm;
+
+namespace {
+
+struct CheckDebugMachineModule : public ModulePass {
+ bool runOnModule(Module &M) override {
+ MachineModuleInfo &MMI =
+ getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
+
+ NamedMDNode *NMD = M.getNamedMetadata("llvm.mir.debugify");
+ if (!NMD) {
+ errs() << "WARNING: Please run mir-debugify to generate "
+ "llvm.mir.debugify metadata first.\n";
+ return false;
+ }
+
+ auto getDebugifyOperand = [&](unsigned Idx) -> unsigned {
+ return mdconst::extract<ConstantInt>(NMD->getOperand(Idx)->getOperand(0))
+ ->getZExtValue();
+ };
+ assert(NMD->getNumOperands() == 2 &&
+ "llvm.mir.debugify should have exactly 2 operands!");
+ unsigned NumLines = getDebugifyOperand(0);
+ unsigned NumVars = getDebugifyOperand(1);
+ BitVector MissingLines{NumLines, true};
+ BitVector MissingVars{NumVars, true};
+
+ for (Function &F : M.functions()) {
+ MachineFunction *MF = MMI.getMachineFunction(F);
+ if (!MF)
+ continue;
+ for (MachineBasicBlock &MBB : *MF) {
+ // Find missing lines.
+ // TODO: Avoid meta instructions other than dbg_val.
+ for (MachineInstr &MI : MBB) {
+ if (MI.isDebugValue())
+ continue;
+ const DebugLoc DL = MI.getDebugLoc();
+ if (DL && DL.getLine() != 0) {
+ MissingLines.reset(DL.getLine() - 1);
+ continue;
+ }
+
+ if (!DL) {
+ errs() << "WARNING: Instruction with empty DebugLoc in function ";
+ errs() << F.getName() << " --";
+ MI.print(errs());
+ }
+ }
+
+ // Find missing variables.
+ // TODO: Handle DBG_INSTR_REF which is under an experimental option now.
+ for (MachineInstr &MI : MBB) {
+ if (!MI.isDebugValue())
+ continue;
+ const DILocalVariable *LocalVar = MI.getDebugVariable();
+ unsigned Var = ~0U;
+
+ (void)to_integer(LocalVar->getName(), Var, 10);
+ assert(Var <= NumVars && "Unexpected name for DILocalVariable");
+ MissingVars.reset(Var - 1);
+ }
+ }
+ }
+
+ bool Fail = false;
+ for (unsigned Idx : MissingLines.set_bits()) {
+ errs() << "WARNING: Missing line " << Idx + 1 << "\n";
+ Fail = true;
+ }
+
+ for (unsigned Idx : MissingVars.set_bits()) {
+ errs() << "WARNING: Missing variable " << Idx + 1 << "\n";
+ Fail = true;
+ }
+ errs() << "Machine IR debug info check: ";
+ errs() << (Fail ? "FAIL" : "PASS") << "\n";
+
+ return false;
+ }
+
+ CheckDebugMachineModule() : ModulePass(ID) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineModuleInfoWrapperPass>();
+ AU.addPreserved<MachineModuleInfoWrapperPass>();
+ AU.setPreservesCFG();
+ }
+
+ static char ID; // Pass identification.
+};
+char CheckDebugMachineModule::ID = 0;
+
+} // end anonymous namespace
+
+INITIALIZE_PASS_BEGIN(CheckDebugMachineModule, DEBUG_TYPE,
+ "Machine Check Debug Module", false, false)
+INITIALIZE_PASS_END(CheckDebugMachineModule, DEBUG_TYPE,
+ "Machine Check Debug Module", false, false)
+
+ModulePass *llvm::createCheckDebugMachineModulePass() {
+ return new CheckDebugMachineModule();
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp
index f241435a0482..e2b6cfe55c16 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp
@@ -22,6 +22,7 @@
#include "llvm/CodeGen/MachineSizeOpts.h"
#include "llvm/CodeGen/MachineTraceMetrics.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSchedule.h"
@@ -72,6 +73,7 @@ class MachineCombiner : public MachineFunctionPass {
MachineTraceMetrics::Ensemble *MinInstr;
MachineBlockFrequencyInfo *MBFI;
ProfileSummaryInfo *PSI;
+ RegisterClassInfo RegClassInfo;
TargetSchedModel TSchedModel;
@@ -103,6 +105,10 @@ private:
SmallVectorImpl<MachineInstr *> &DelInstrs,
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
MachineCombinerPattern Pattern, bool SlackIsAccurate);
+ bool reduceRegisterPressure(MachineInstr &Root, MachineBasicBlock *MBB,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs,
+ MachineCombinerPattern Pattern);
bool preservesResourceLen(MachineBasicBlock *MBB,
MachineTraceMetrics::Trace BlockTrace,
SmallVectorImpl<MachineInstr *> &InsInstrs,
@@ -257,8 +263,9 @@ unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot,
/// The combiner's goal may differ based on which pattern it is attempting
/// to optimize.
enum class CombinerObjective {
- MustReduceDepth, // The data dependency chain must be improved.
- Default // The critical path must not be lengthened.
+ MustReduceDepth, // The data dependency chain must be improved.
+ MustReduceRegisterPressure, // The register pressure must be reduced.
+ Default // The critical path must not be lengthened.
};
static CombinerObjective getCombinerObjective(MachineCombinerPattern P) {
@@ -272,6 +279,9 @@ static CombinerObjective getCombinerObjective(MachineCombinerPattern P) {
case MachineCombinerPattern::REASSOC_XY_AMM_BMM:
case MachineCombinerPattern::REASSOC_XMM_AMM_BMM:
return CombinerObjective::MustReduceDepth;
+ case MachineCombinerPattern::REASSOC_XY_BCA:
+ case MachineCombinerPattern::REASSOC_XY_BAC:
+ return CombinerObjective::MustReduceRegisterPressure;
default:
return CombinerObjective::Default;
}
@@ -300,6 +310,18 @@ std::pair<unsigned, unsigned> MachineCombiner::getLatenciesForInstrSequences(
return {NewRootLatency, RootLatency};
}
+bool MachineCombiner::reduceRegisterPressure(
+ MachineInstr &Root, MachineBasicBlock *MBB,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs,
+ MachineCombinerPattern Pattern) {
+ // FIXME: for now, we don't do any check for the register pressure patterns.
+ // We treat them as always profitable. But we can do better if we make
+ // RegPressureTracker class be aware of TIE attribute. Then we can get an
+ // accurate compare of register pressure with DelInstrs or InsInstrs.
+ return true;
+}
+
/// The DAGCombine code sequence ends in MI (Machine Instruction) Root.
/// The new code sequence ends in MI NewRoot. A necessary condition for the new
/// sequence to replace the old sequence is that it cannot lengthen the critical
@@ -438,6 +460,8 @@ bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize,
/// \param DelInstrs instruction to delete from \p MBB
/// \param MinInstr is a pointer to the machine trace information
/// \param RegUnits set of live registers, needed to compute instruction depths
+/// \param TII is target instruction info, used to call target hook
+/// \param Pattern is used to call target hook finalizeInsInstrs
/// \param IncrementalUpdate if true, compute instruction depths incrementally,
/// otherwise invalidate the trace
static void insertDeleteInstructions(MachineBasicBlock *MBB, MachineInstr &MI,
@@ -445,7 +469,18 @@ static void insertDeleteInstructions(MachineBasicBlock *MBB, MachineInstr &MI,
SmallVector<MachineInstr *, 16> DelInstrs,
MachineTraceMetrics::Ensemble *MinInstr,
SparseSet<LiveRegUnit> &RegUnits,
+ const TargetInstrInfo *TII,
+ MachineCombinerPattern Pattern,
bool IncrementalUpdate) {
+ // If we want to fix up some placeholder for some target, do it now.
+ // We need this because in genAlternativeCodeSequence, we have not decided the
+ // better pattern InsInstrs or DelInstrs, so we don't want generate some
+ // sideeffect to the function. For example we need to delay the constant pool
+ // entry creation here after InsInstrs is selected as better pattern.
+ // Otherwise the constant pool entry created for InsInstrs will not be deleted
+ // even if InsInstrs is not the better pattern.
+ TII->finalizeInsInstrs(MI, Pattern, InsInstrs);
+
for (auto *InstrPtr : InsInstrs)
MBB->insert((MachineBasicBlock::iterator)&MI, InstrPtr);
@@ -522,6 +557,9 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
bool OptForSize = OptSize || llvm::shouldOptimizeForSize(MBB, PSI, MBFI);
+ bool DoRegPressureReduce =
+ TII->shouldReduceRegisterPressure(MBB, &RegClassInfo);
+
while (BlockIter != MBB->end()) {
auto &MI = *BlockIter++;
SmallVector<MachineCombinerPattern, 16> Patterns;
@@ -552,7 +590,7 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
// machine-combiner-verify-pattern-order is enabled, all patterns are
// checked to ensure later patterns do not provide better latency savings.
- if (!TII->getMachineCombinerPatterns(MI, Patterns))
+ if (!TII->getMachineCombinerPatterns(MI, Patterns, DoRegPressureReduce))
continue;
if (VerifyPatternOrder)
@@ -588,12 +626,33 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
if (ML && TII->isThroughputPattern(P))
SubstituteAlways = true;
- if (IncrementalUpdate) {
+ if (IncrementalUpdate && LastUpdate != BlockIter) {
// Update depths since the last incremental update.
MinInstr->updateDepths(LastUpdate, BlockIter, RegUnits);
LastUpdate = BlockIter;
}
+ if (DoRegPressureReduce &&
+ getCombinerObjective(P) ==
+ CombinerObjective::MustReduceRegisterPressure) {
+ if (MBB->size() > inc_threshold) {
+ // Use incremental depth updates for basic blocks above threshold
+ IncrementalUpdate = true;
+ LastUpdate = BlockIter;
+ }
+ if (reduceRegisterPressure(MI, MBB, InsInstrs, DelInstrs, P)) {
+ // Replace DelInstrs with InsInstrs.
+ insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr,
+ RegUnits, TII, P, IncrementalUpdate);
+ Changed |= true;
+
+ // Go back to previous instruction as it may have ILP reassociation
+ // opportunity.
+ BlockIter--;
+ break;
+ }
+ }
+
// Substitute when we optimize for codesize and the new sequence has
// fewer instructions OR
// the new sequence neither lengthens the critical path nor increases
@@ -601,7 +660,7 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
if (SubstituteAlways ||
doSubstitute(NewInstCount, OldInstCount, OptForSize)) {
insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr,
- RegUnits, IncrementalUpdate);
+ RegUnits, TII, P, IncrementalUpdate);
// Eagerly stop after the first pattern fires.
Changed = true;
break;
@@ -624,7 +683,7 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
}
insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr,
- RegUnits, IncrementalUpdate);
+ RegUnits, TII, P, IncrementalUpdate);
// Eagerly stop after the first pattern fires.
Changed = true;
@@ -660,6 +719,7 @@ bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) {
nullptr;
MinInstr = nullptr;
OptSize = MF.getFunction().hasOptSize();
+ RegClassInfo.runOnMachineFunction(MF);
LLVM_DEBUG(dbgs() << getPassName() << ": " << MF.getName() << '\n');
if (!TII->useMachineCombiner()) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp
index 4c4839ca6522..d8659c1c7853 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp
@@ -88,18 +88,18 @@ namespace {
class CopyTracker {
struct CopyInfo {
MachineInstr *MI;
- SmallVector<unsigned, 4> DefRegs;
+ SmallVector<MCRegister, 4> DefRegs;
bool Avail;
};
- DenseMap<unsigned, CopyInfo> Copies;
+ DenseMap<MCRegister, CopyInfo> Copies;
public:
/// Mark all of the given registers and their subregisters as unavailable for
/// copying.
- void markRegsUnavailable(ArrayRef<unsigned> Regs,
+ void markRegsUnavailable(ArrayRef<MCRegister> Regs,
const TargetRegisterInfo &TRI) {
- for (unsigned Reg : Regs) {
+ for (MCRegister Reg : Regs) {
// Source of copy is no longer available for propagation.
for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI) {
auto CI = Copies.find(*RUI);
@@ -110,30 +110,30 @@ public:
}
/// Remove register from copy maps.
- void invalidateRegister(unsigned Reg, const TargetRegisterInfo &TRI) {
+ void invalidateRegister(MCRegister Reg, const TargetRegisterInfo &TRI) {
// Since Reg might be a subreg of some registers, only invalidate Reg is not
// enough. We have to find the COPY defines Reg or registers defined by Reg
// and invalidate all of them.
- SmallSet<unsigned, 8> RegsToInvalidate;
+ SmallSet<MCRegister, 8> RegsToInvalidate;
RegsToInvalidate.insert(Reg);
for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI) {
auto I = Copies.find(*RUI);
if (I != Copies.end()) {
if (MachineInstr *MI = I->second.MI) {
- RegsToInvalidate.insert(MI->getOperand(0).getReg());
- RegsToInvalidate.insert(MI->getOperand(1).getReg());
+ RegsToInvalidate.insert(MI->getOperand(0).getReg().asMCReg());
+ RegsToInvalidate.insert(MI->getOperand(1).getReg().asMCReg());
}
RegsToInvalidate.insert(I->second.DefRegs.begin(),
I->second.DefRegs.end());
}
}
- for (unsigned InvalidReg : RegsToInvalidate)
+ for (MCRegister InvalidReg : RegsToInvalidate)
for (MCRegUnitIterator RUI(InvalidReg, &TRI); RUI.isValid(); ++RUI)
Copies.erase(*RUI);
}
/// Clobber a single register, removing it from the tracker's copy maps.
- void clobberRegister(unsigned Reg, const TargetRegisterInfo &TRI) {
+ void clobberRegister(MCRegister Reg, const TargetRegisterInfo &TRI) {
for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI) {
auto I = Copies.find(*RUI);
if (I != Copies.end()) {
@@ -143,7 +143,7 @@ public:
// When we clobber the destination of a copy, we need to clobber the
// whole register it defined.
if (MachineInstr *MI = I->second.MI)
- markRegsUnavailable({MI->getOperand(0).getReg()}, TRI);
+ markRegsUnavailable({MI->getOperand(0).getReg().asMCReg()}, TRI);
// Now we can erase the copy.
Copies.erase(I);
}
@@ -154,8 +154,8 @@ public:
void trackCopy(MachineInstr *MI, const TargetRegisterInfo &TRI) {
assert(MI->isCopy() && "Tracking non-copy?");
- Register Def = MI->getOperand(0).getReg();
- Register Src = MI->getOperand(1).getReg();
+ MCRegister Def = MI->getOperand(0).getReg().asMCReg();
+ MCRegister Src = MI->getOperand(1).getReg().asMCReg();
// Remember Def is defined by the copy.
for (MCRegUnitIterator RUI(Def, &TRI); RUI.isValid(); ++RUI)
@@ -175,8 +175,9 @@ public:
return !Copies.empty();
}
- MachineInstr *findCopyForUnit(unsigned RegUnit, const TargetRegisterInfo &TRI,
- bool MustBeAvailable = false) {
+ MachineInstr *findCopyForUnit(MCRegister RegUnit,
+ const TargetRegisterInfo &TRI,
+ bool MustBeAvailable = false) {
auto CI = Copies.find(RegUnit);
if (CI == Copies.end())
return nullptr;
@@ -185,8 +186,8 @@ public:
return CI->second.MI;
}
- MachineInstr *findCopyDefViaUnit(unsigned RegUnit,
- const TargetRegisterInfo &TRI) {
+ MachineInstr *findCopyDefViaUnit(MCRegister RegUnit,
+ const TargetRegisterInfo &TRI) {
auto CI = Copies.find(RegUnit);
if (CI == Copies.end())
return nullptr;
@@ -196,7 +197,7 @@ public:
return findCopyForUnit(*RUI, TRI, true);
}
- MachineInstr *findAvailBackwardCopy(MachineInstr &I, unsigned Reg,
+ MachineInstr *findAvailBackwardCopy(MachineInstr &I, MCRegister Reg,
const TargetRegisterInfo &TRI) {
MCRegUnitIterator RUI(Reg, &TRI);
MachineInstr *AvailCopy = findCopyDefViaUnit(*RUI, TRI);
@@ -217,7 +218,7 @@ public:
return AvailCopy;
}
- MachineInstr *findAvailCopy(MachineInstr &DestCopy, unsigned Reg,
+ MachineInstr *findAvailCopy(MachineInstr &DestCopy, MCRegister Reg,
const TargetRegisterInfo &TRI) {
// We check the first RegUnit here, since we'll only be interested in the
// copy if it copies the entire register anyway.
@@ -274,12 +275,10 @@ public:
private:
typedef enum { DebugUse = false, RegularUse = true } DebugType;
- void ClobberRegister(unsigned Reg);
- void ReadRegister(unsigned Reg, MachineInstr &Reader,
- DebugType DT);
+ void ReadRegister(MCRegister Reg, MachineInstr &Reader, DebugType DT);
void ForwardCopyPropagateBlock(MachineBasicBlock &MBB);
void BackwardCopyPropagateBlock(MachineBasicBlock &MBB);
- bool eraseIfRedundant(MachineInstr &Copy, unsigned Src, unsigned Def);
+ bool eraseIfRedundant(MachineInstr &Copy, MCRegister Src, MCRegister Def);
void forwardUses(MachineInstr &MI);
void propagateDefs(MachineInstr &MI);
bool isForwardableRegClassCopy(const MachineInstr &Copy,
@@ -288,6 +287,8 @@ private:
const MachineInstr &UseI,
unsigned UseIdx);
bool hasImplicitOverlap(const MachineInstr &MI, const MachineOperand &Use);
+ bool hasOverlappingMultipleDef(const MachineInstr &MI,
+ const MachineOperand &MODef, Register Def);
/// Candidates for deletion.
SmallSetVector<MachineInstr *, 8> MaybeDeadCopies;
@@ -309,7 +310,7 @@ char &llvm::MachineCopyPropagationID = MachineCopyPropagation::ID;
INITIALIZE_PASS(MachineCopyPropagation, DEBUG_TYPE,
"Machine Copy Propagation Pass", false, false)
-void MachineCopyPropagation::ReadRegister(unsigned Reg, MachineInstr &Reader,
+void MachineCopyPropagation::ReadRegister(MCRegister Reg, MachineInstr &Reader,
DebugType DT) {
// If 'Reg' is defined by a copy, the copy is no longer a candidate
// for elimination. If a copy is "read" by a debug user, record the user
@@ -332,10 +333,10 @@ void MachineCopyPropagation::ReadRegister(unsigned Reg, MachineInstr &Reader,
/// PreviousCopy. e.g.
/// isNopCopy("ecx = COPY eax", AX, CX) == true
/// isNopCopy("ecx = COPY eax", AH, CL) == false
-static bool isNopCopy(const MachineInstr &PreviousCopy, unsigned Src,
- unsigned Def, const TargetRegisterInfo *TRI) {
- Register PreviousSrc = PreviousCopy.getOperand(1).getReg();
- Register PreviousDef = PreviousCopy.getOperand(0).getReg();
+static bool isNopCopy(const MachineInstr &PreviousCopy, MCRegister Src,
+ MCRegister Def, const TargetRegisterInfo *TRI) {
+ MCRegister PreviousSrc = PreviousCopy.getOperand(1).getReg().asMCReg();
+ MCRegister PreviousDef = PreviousCopy.getOperand(0).getReg().asMCReg();
if (Src == PreviousSrc && Def == PreviousDef)
return true;
if (!TRI->isSubRegister(PreviousSrc, Src))
@@ -347,8 +348,8 @@ static bool isNopCopy(const MachineInstr &PreviousCopy, unsigned Src,
/// Remove instruction \p Copy if there exists a previous copy that copies the
/// register \p Src to the register \p Def; This may happen indirectly by
/// copying the super registers.
-bool MachineCopyPropagation::eraseIfRedundant(MachineInstr &Copy, unsigned Src,
- unsigned Def) {
+bool MachineCopyPropagation::eraseIfRedundant(MachineInstr &Copy,
+ MCRegister Src, MCRegister Def) {
// Avoid eliminating a copy from/to a reserved registers as we cannot predict
// the value (Example: The sparc zero register is writable but stays zero).
if (MRI->isReserved(Src) || MRI->isReserved(Def))
@@ -459,6 +460,21 @@ bool MachineCopyPropagation::hasImplicitOverlap(const MachineInstr &MI,
return false;
}
+/// For an MI that has multiple definitions, check whether \p MI has
+/// a definition that overlaps with another of its definitions.
+/// For example, on ARM: umull r9, r9, lr, r0
+/// The umull instruction is unpredictable unless RdHi and RdLo are different.
+bool MachineCopyPropagation::hasOverlappingMultipleDef(
+ const MachineInstr &MI, const MachineOperand &MODef, Register Def) {
+ for (const MachineOperand &MIDef : MI.defs()) {
+ if ((&MIDef != &MODef) && MIDef.isReg() &&
+ TRI->regsOverlap(Def, MIDef.getReg()))
+ return true;
+ }
+
+ return false;
+}
+
/// Look for available copies whose destination register is used by \p MI and
/// replace the use in \p MI with the copy's source register.
void MachineCopyPropagation::forwardUses(MachineInstr &MI) {
@@ -489,7 +505,8 @@ void MachineCopyPropagation::forwardUses(MachineInstr &MI) {
if (!MOUse.isRenamable())
continue;
- MachineInstr *Copy = Tracker.findAvailCopy(MI, MOUse.getReg(), *TRI);
+ MachineInstr *Copy =
+ Tracker.findAvailCopy(MI, MOUse.getReg().asMCReg(), *TRI);
if (!Copy)
continue;
@@ -561,13 +578,13 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
// Analyze copies (which don't overlap themselves).
if (MI->isCopy() && !TRI->regsOverlap(MI->getOperand(0).getReg(),
MI->getOperand(1).getReg())) {
- Register Def = MI->getOperand(0).getReg();
- Register Src = MI->getOperand(1).getReg();
-
- assert(!Register::isVirtualRegister(Def) &&
- !Register::isVirtualRegister(Src) &&
+ assert(MI->getOperand(0).getReg().isPhysical() &&
+ MI->getOperand(1).getReg().isPhysical() &&
"MachineCopyPropagation should be run after register allocation!");
+ MCRegister Def = MI->getOperand(0).getReg().asMCReg();
+ MCRegister Src = MI->getOperand(1).getReg().asMCReg();
+
// The two copies cancel out and the source of the first copy
// hasn't been overridden, eliminate the second one. e.g.
// %ecx = COPY %eax
@@ -589,7 +606,7 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
forwardUses(*MI);
// Src may have been changed by forwardUses()
- Src = MI->getOperand(1).getReg();
+ Src = MI->getOperand(1).getReg().asMCReg();
// If Src is defined by a previous copy, the previous copy cannot be
// eliminated.
@@ -597,7 +614,7 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
for (const MachineOperand &MO : MI->implicit_operands()) {
if (!MO.isReg() || !MO.readsReg())
continue;
- Register Reg = MO.getReg();
+ MCRegister Reg = MO.getReg().asMCReg();
if (!Reg)
continue;
ReadRegister(Reg, *MI, RegularUse);
@@ -620,7 +637,7 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
for (const MachineOperand &MO : MI->implicit_operands()) {
if (!MO.isReg() || !MO.isDef())
continue;
- Register Reg = MO.getReg();
+ MCRegister Reg = MO.getReg().asMCReg();
if (!Reg)
continue;
Tracker.clobberRegister(Reg, *TRI);
@@ -634,7 +651,7 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
// Clobber any earlyclobber regs first.
for (const MachineOperand &MO : MI->operands())
if (MO.isReg() && MO.isEarlyClobber()) {
- Register Reg = MO.getReg();
+ MCRegister Reg = MO.getReg().asMCReg();
// If we have a tied earlyclobber, that means it is also read by this
// instruction, so we need to make sure we don't remove it as dead
// later.
@@ -646,7 +663,7 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
forwardUses(*MI);
// Not a copy.
- SmallVector<unsigned, 2> Defs;
+ SmallVector<Register, 2> Defs;
const MachineOperand *RegMask = nullptr;
for (const MachineOperand &MO : MI->operands()) {
if (MO.isRegMask())
@@ -657,14 +674,14 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
if (!Reg)
continue;
- assert(!Register::isVirtualRegister(Reg) &&
+ assert(!Reg.isVirtual() &&
"MachineCopyPropagation should be run after register allocation!");
if (MO.isDef() && !MO.isEarlyClobber()) {
- Defs.push_back(Reg);
+ Defs.push_back(Reg.asMCReg());
continue;
} else if (MO.readsReg())
- ReadRegister(Reg, *MI, MO.isDebug() ? DebugUse : RegularUse);
+ ReadRegister(Reg.asMCReg(), *MI, MO.isDebug() ? DebugUse : RegularUse);
}
// The instruction has a register mask operand which means that it clobbers
@@ -676,7 +693,7 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
MaybeDeadCopies.begin();
DI != MaybeDeadCopies.end();) {
MachineInstr *MaybeDead = *DI;
- Register Reg = MaybeDead->getOperand(0).getReg();
+ MCRegister Reg = MaybeDead->getOperand(0).getReg().asMCReg();
assert(!MRI->isReserved(Reg));
if (!RegMask->clobbersPhysReg(Reg)) {
@@ -701,7 +718,7 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
}
// Any previous copy definition or reading the Defs is no longer available.
- for (unsigned Reg : Defs)
+ for (MCRegister Reg : Defs)
Tracker.clobberRegister(Reg, *TRI);
}
@@ -716,7 +733,7 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
// Update matching debug values, if any.
assert(MaybeDead->isCopy());
- unsigned SrcReg = MaybeDead->getOperand(1).getReg();
+ Register SrcReg = MaybeDead->getOperand(1).getReg();
MRI->updateDbgUsersToReg(SrcReg, CopyDbgUsers[MaybeDead]);
MaybeDead->eraseFromParent();
@@ -768,7 +785,7 @@ void MachineCopyPropagation::propagateDefs(MachineInstr &MI) {
continue;
MachineInstr *Copy =
- Tracker.findAvailBackwardCopy(MI, MODef.getReg(), *TRI);
+ Tracker.findAvailBackwardCopy(MI, MODef.getReg().asMCReg(), *TRI);
if (!Copy)
continue;
@@ -784,6 +801,9 @@ void MachineCopyPropagation::propagateDefs(MachineInstr &MI) {
if (hasImplicitOverlap(MI, MODef))
continue;
+ if (hasOverlappingMultipleDef(MI, MODef, Def))
+ continue;
+
LLVM_DEBUG(dbgs() << "MCP: Replacing " << printReg(MODef.getReg(), TRI)
<< "\n with " << printReg(Def, TRI) << "\n in "
<< MI << " from " << *Copy);
@@ -813,8 +833,8 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock(
!TRI->regsOverlap(MI->getOperand(0).getReg(),
MI->getOperand(1).getReg())) {
- Register Def = MI->getOperand(0).getReg();
- Register Src = MI->getOperand(1).getReg();
+ MCRegister Def = MI->getOperand(0).getReg().asMCReg();
+ MCRegister Src = MI->getOperand(1).getReg().asMCReg();
// Unlike forward cp, we don't invoke propagateDefs here,
// just let forward cp do COPY-to-COPY propagation.
@@ -829,7 +849,7 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock(
// Invalidate any earlyclobber regs first.
for (const MachineOperand &MO : MI->operands())
if (MO.isReg() && MO.isEarlyClobber()) {
- Register Reg = MO.getReg();
+ MCRegister Reg = MO.getReg().asMCReg();
if (!Reg)
continue;
Tracker.invalidateRegister(Reg, *TRI);
@@ -844,10 +864,10 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock(
continue;
if (MO.isDef())
- Tracker.invalidateRegister(MO.getReg(), *TRI);
+ Tracker.invalidateRegister(MO.getReg().asMCReg(), *TRI);
if (MO.readsReg())
- Tracker.invalidateRegister(MO.getReg(), *TRI);
+ Tracker.invalidateRegister(MO.getReg().asMCReg(), *TRI);
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineDebugify.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineDebugify.cpp
index bf57ec0e8c28..599a81847592 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineDebugify.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineDebugify.cpp
@@ -14,6 +14,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -89,10 +90,11 @@ bool applyDebugifyMetadataToMachineFunction(MachineModuleInfo &MMI,
// Do this by introducing debug uses of each register definition. If that is
// not possible (e.g. we have a phi or a meta instruction), emit a constant.
uint64_t NextImm = 0;
+ SmallSet<DILocalVariable *, 16> VarSet;
const MCInstrDesc &DbgValDesc = TII.get(TargetOpcode::DBG_VALUE);
for (MachineBasicBlock &MBB : MF) {
MachineBasicBlock::iterator FirstNonPHIIt = MBB.getFirstNonPHI();
- for (auto I = MBB.begin(), E = MBB.end(); I != E; ) {
+ for (auto I = MBB.begin(), E = MBB.end(); I != E;) {
MachineInstr &MI = *I;
++I;
@@ -113,6 +115,7 @@ bool applyDebugifyMetadataToMachineFunction(MachineModuleInfo &MMI,
Line = EarliestDVI->getDebugLoc().getLine();
DILocalVariable *LocalVar = Line2Var[Line];
assert(LocalVar && "No variable for current line?");
+ VarSet.insert(LocalVar);
// Emit DBG_VALUEs for register definitions.
SmallVector<MachineOperand *, 4> RegDefs;
@@ -132,6 +135,33 @@ bool applyDebugifyMetadataToMachineFunction(MachineModuleInfo &MMI,
}
}
+ // Here we save the number of lines and variables into "llvm.mir.debugify".
+ // It is useful for mir-check-debugify.
+ NamedMDNode *NMD = M.getNamedMetadata("llvm.mir.debugify");
+ IntegerType *Int32Ty = Type::getInt32Ty(Ctx);
+ if (!NMD) {
+ NMD = M.getOrInsertNamedMetadata("llvm.mir.debugify");
+ auto addDebugifyOperand = [&](unsigned N) {
+ NMD->addOperand(MDNode::get(
+ Ctx, ValueAsMetadata::getConstant(ConstantInt::get(Int32Ty, N))));
+ };
+ // Add number of lines.
+ addDebugifyOperand(NextLine - 1);
+ // Add number of variables.
+ addDebugifyOperand(VarSet.size());
+ } else {
+ assert(NMD->getNumOperands() == 2 &&
+ "llvm.mir.debugify should have exactly 2 operands!");
+ auto setDebugifyOperand = [&](unsigned Idx, unsigned N) {
+ NMD->setOperand(Idx, MDNode::get(Ctx, ValueAsMetadata::getConstant(
+ ConstantInt::get(Int32Ty, N))));
+ };
+ // Set number of lines.
+ setDebugifyOperand(0, NextLine - 1);
+ // Set number of variables.
+ setDebugifyOperand(1, VarSet.size());
+ }
+
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp
index 6d45f08804ed..3f44578b1a2c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp
@@ -273,20 +273,7 @@ getOrCreateJumpTableInfo(unsigned EntryKind) {
}
DenormalMode MachineFunction::getDenormalMode(const fltSemantics &FPType) const {
- if (&FPType == &APFloat::IEEEsingle()) {
- Attribute Attr = F.getFnAttribute("denormal-fp-math-f32");
- StringRef Val = Attr.getValueAsString();
- if (!Val.empty())
- return parseDenormalFPAttribute(Val);
-
- // If the f32 variant of the attribute isn't specified, try to use the
- // generic one.
- }
-
- // TODO: Should probably avoid the connection to the IR and store directly
- // in the MachineFunction.
- Attribute Attr = F.getFnAttribute("denormal-fp-math");
- return parseDenormalFPAttribute(Attr.getValueAsString());
+ return F.getDenormalMode(FPType);
}
/// Should we be emitting segmented stack stuff for the function
@@ -341,33 +328,6 @@ void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) {
MBBNumbering.resize(BlockNo);
}
-/// This is used with -fbasic-block-sections or -fbasicblock-labels option.
-/// A unary encoding of basic block labels is done to keep ".strtab" sizes
-/// small.
-void MachineFunction::createBBLabels() {
- const TargetInstrInfo *TII = getSubtarget().getInstrInfo();
- this->BBSectionsSymbolPrefix.resize(getNumBlockIDs(), 'a');
- for (auto MBBI = begin(), E = end(); MBBI != E; ++MBBI) {
- assert(
- (MBBI->getNumber() >= 0 && MBBI->getNumber() < (int)getNumBlockIDs()) &&
- "BasicBlock number was out of range!");
- // 'a' - Normal block.
- // 'r' - Return block.
- // 'l' - Landing Pad.
- // 'L' - Return and landing pad.
- bool isEHPad = MBBI->isEHPad();
- bool isRetBlock = MBBI->isReturnBlock() && !TII->isTailCall(MBBI->back());
- char type = 'a';
- if (isEHPad && isRetBlock)
- type = 'L';
- else if (isEHPad)
- type = 'l';
- else if (isRetBlock)
- type = 'r';
- BBSectionsSymbolPrefix[MBBI->getNumber()] = type;
- }
-}
-
/// This method iterates over the basic blocks and assigns their IsBeginSection
/// and IsEndSection fields. This must be called after MBB layout is finalized
/// and the SectionID's are assigned to MBBs.
@@ -387,9 +347,9 @@ void MachineFunction::assignBeginEndSections() {
/// Allocate a new MachineInstr. Use this instead of `new MachineInstr'.
MachineInstr *MachineFunction::CreateMachineInstr(const MCInstrDesc &MCID,
const DebugLoc &DL,
- bool NoImp) {
+ bool NoImplicit) {
return new (InstructionRecycler.Allocate<MachineInstr>(Allocator))
- MachineInstr(*this, MCID, DL, NoImp);
+ MachineInstr(*this, MCID, DL, NoImplicit);
}
/// Create a new MachineInstr which is a copy of the 'Orig' instruction,
@@ -460,6 +420,9 @@ MachineFunction::CreateMachineBasicBlock(const BasicBlock *bb) {
void
MachineFunction::DeleteMachineBasicBlock(MachineBasicBlock *MBB) {
assert(MBB->getParent() == this && "MBB parent mismatch!");
+ // Clean up any references to MBB in jump tables before deleting it.
+ if (JumpTableInfo)
+ JumpTableInfo->RemoveMBBFromJumpTables(MBB);
MBB->~MachineBasicBlock();
BasicBlockRecycler.Deallocate(Allocator, MBB);
}
@@ -474,6 +437,13 @@ MachineMemOperand *MachineFunction::getMachineMemOperand(
SSID, Ordering, FailureOrdering);
}
+MachineMemOperand *MachineFunction::getMachineMemOperand(
+ const MachineMemOperand *MMO, MachinePointerInfo &PtrInfo, uint64_t Size) {
+ return new (Allocator) MachineMemOperand(
+ PtrInfo, MMO->getFlags(), Size, MMO->getBaseAlign(), AAMDNodes(), nullptr,
+ MMO->getSyncScopeID(), MMO->getOrdering(), MMO->getFailureOrdering());
+}
+
MachineMemOperand *
MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO,
int64_t Offset, uint64_t Size) {
@@ -485,9 +455,11 @@ MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO,
? commonAlignment(MMO->getBaseAlign(), Offset)
: MMO->getBaseAlign();
+ // Do not preserve ranges, since we don't necessarily know what the high bits
+ // are anymore.
return new (Allocator)
MachineMemOperand(PtrInfo.getWithOffset(Offset), MMO->getFlags(), Size,
- Alignment, AAMDNodes(), nullptr, MMO->getSyncScopeID(),
+ Alignment, MMO->getAAInfo(), nullptr, MMO->getSyncScopeID(),
MMO->getOrdering(), MMO->getFailureOrdering());
}
@@ -896,7 +868,7 @@ try_next:;
// Add the new filter.
int FilterID = -(1 + FilterIds.size());
FilterIds.reserve(FilterIds.size() + TyIds.size() + 1);
- FilterIds.insert(FilterIds.end(), TyIds.begin(), TyIds.end());
+ llvm::append_range(FilterIds, TyIds);
FilterEnds.push_back(FilterIds.size());
FilterIds.push_back(0); // terminator
return FilterID;
@@ -974,6 +946,46 @@ void MachineFunction::moveCallSiteInfo(const MachineInstr *Old,
CallSitesInfo[New] = CSInfo;
}
+void MachineFunction::setDebugInstrNumberingCount(unsigned Num) {
+ DebugInstrNumberingCount = Num;
+}
+
+void MachineFunction::makeDebugValueSubstitution(DebugInstrOperandPair A,
+ DebugInstrOperandPair B) {
+ auto Result = DebugValueSubstitutions.insert(std::make_pair(A, B));
+ (void)Result;
+ assert(Result.second && "Substitution for an already substituted value?");
+}
+
+void MachineFunction::substituteDebugValuesForInst(const MachineInstr &Old,
+ MachineInstr &New,
+ unsigned MaxOperand) {
+ // If the Old instruction wasn't tracked at all, there is no work to do.
+ unsigned OldInstrNum = Old.peekDebugInstrNum();
+ if (!OldInstrNum)
+ return;
+
+ // Iterate over all operands looking for defs to create substitutions for.
+ // Avoid creating new instr numbers unless we create a new substitution.
+ // While this has no functional effect, it risks confusing someone reading
+ // MIR output.
+ // Examine all the operands, or the first N specified by the caller.
+ MaxOperand = std::min(MaxOperand, Old.getNumOperands());
+ for (unsigned int I = 0; I < Old.getNumOperands(); ++I) {
+ const auto &OldMO = Old.getOperand(I);
+ auto &NewMO = New.getOperand(I);
+ (void)NewMO;
+
+ if (!OldMO.isReg() || !OldMO.isDef())
+ continue;
+ assert(NewMO.isDef());
+
+ unsigned NewInstrNum = New.getDebugInstrNum();
+ makeDebugValueSubstitution(std::make_pair(OldInstrNum, I),
+ std::make_pair(NewInstrNum, I));
+ }
+}
+
/// \}
//===----------------------------------------------------------------------===//
@@ -1038,6 +1050,17 @@ bool MachineJumpTableInfo::ReplaceMBBInJumpTables(MachineBasicBlock *Old,
return MadeChange;
}
+/// If MBB is present in any jump tables, remove it.
+bool MachineJumpTableInfo::RemoveMBBFromJumpTables(MachineBasicBlock *MBB) {
+ bool MadeChange = false;
+ for (MachineJumpTableEntry &JTE : JumpTables) {
+ auto removeBeginItr = std::remove(JTE.MBBs.begin(), JTE.MBBs.end(), MBB);
+ MadeChange |= (removeBeginItr != JTE.MBBs.end());
+ JTE.MBBs.erase(removeBeginItr, JTE.MBBs.end());
+ }
+ return MadeChange;
+}
+
/// If Old is a target of the jump tables, update the jump table to branch to
/// New instead.
bool MachineJumpTableInfo::ReplaceMBBInJumpTable(unsigned Idx,
@@ -1084,10 +1107,14 @@ Printable llvm::printJumpTableEntryReference(unsigned Idx) {
void MachineConstantPoolValue::anchor() {}
-Type *MachineConstantPoolEntry::getType() const {
+unsigned MachineConstantPoolValue::getSizeInBytes(const DataLayout &DL) const {
+ return DL.getTypeAllocSize(Ty);
+}
+
+unsigned MachineConstantPoolEntry::getSizeInBytes(const DataLayout &DL) const {
if (isMachineConstantPoolEntry())
- return Val.MachineCPVal->getType();
- return Val.ConstVal->getType();
+ return Val.MachineCPVal->getSizeInBytes(DL);
+ return DL.getTypeAllocSize(Val.ConstVal->getType());
}
bool MachineConstantPoolEntry::needsRelocation() const {
@@ -1100,7 +1127,7 @@ SectionKind
MachineConstantPoolEntry::getSectionKind(const DataLayout *DL) const {
if (needsRelocation())
return SectionKind::getReadOnlyWithRel();
- switch (DL->getTypeAllocSize(getType())) {
+ switch (getSizeInBytes(*DL)) {
case 4:
return SectionKind::getMergeableConst4();
case 8:
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPass.cpp
index 03149aa7db4a..16cde1f601f9 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPass.cpp
@@ -11,7 +11,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/DominanceFrontier.h"
#include "llvm/Analysis/GlobalsModRef.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
index 3645a4e3466b..c31c065b1976 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
@@ -14,7 +14,7 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/SlotIndexes.h"
-#include "llvm/IR/IRPrintingPasses.h"
+#include "llvm/IR/PrintPasses.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -44,7 +44,7 @@ struct MachineFunctionPrinterPass : public MachineFunctionPass {
}
bool runOnMachineFunction(MachineFunction &MF) override {
- if (!llvm::isFunctionInPrintList(MF.getName()))
+ if (!isFunctionInPrintList(MF.getName()))
return false;
OS << "# " << Banner << ":\n";
MF.print(OS, getAnalysisIfAvailable<SlotIndexes>());
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionSplitter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionSplitter.cpp
new file mode 100644
index 000000000000..483809a8ed96
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionSplitter.cpp
@@ -0,0 +1,155 @@
+//===-- MachineFunctionSplitter.cpp - Split machine functions //-----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// \file
+// Uses profile information to split out cold blocks.
+//
+// This pass splits out cold machine basic blocks from the parent function. This
+// implementation leverages the basic block section framework. Blocks marked
+// cold by this pass are grouped together in a separate section prefixed with
+// ".text.unlikely.*". The linker can then group these together as a cold
+// section. The split part of the function is a contiguous region identified by
+// the symbol "foo.cold". Grouping all cold blocks across functions together
+// decreases fragmentation and improves icache and itlb utilization. Note that
+// the overall changes to the binary size are negligible; only a small number of
+// additional jump instructions may be introduced.
+//
+// For the original RFC of this pass please see
+// https://groups.google.com/d/msg/llvm-dev/RUegaMg-iqc/wFAVxa6fCgAJ
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/CodeGen/BasicBlockSectionUtils.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+// FIXME: This cutoff value is CPU dependent and should be moved to
+// TargetTransformInfo once we consider enabling this on other platforms.
+// The value is expressed as a ProfileSummaryInfo integer percentile cutoff.
+// Defaults to 999950, i.e. all blocks colder than 99.995 percentile are split.
+// The default was empirically determined to be optimal when considering cutoff
+// values between 99%-ile to 100%-ile with respect to iTLB and icache metrics on
+// Intel CPUs.
+static cl::opt<unsigned>
+ PercentileCutoff("mfs-psi-cutoff",
+ cl::desc("Percentile profile summary cutoff used to "
+ "determine cold blocks. Unused if set to zero."),
+ cl::init(999950), cl::Hidden);
+
+static cl::opt<unsigned> ColdCountThreshold(
+ "mfs-count-threshold",
+ cl::desc(
+ "Minimum number of times a block must be executed to be retained."),
+ cl::init(1), cl::Hidden);
+
+namespace {
+
+class MachineFunctionSplitter : public MachineFunctionPass {
+public:
+ static char ID;
+ MachineFunctionSplitter() : MachineFunctionPass(ID) {
+ initializeMachineFunctionSplitterPass(*PassRegistry::getPassRegistry());
+ }
+
+ StringRef getPassName() const override {
+ return "Machine Function Splitter Transformation";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ bool runOnMachineFunction(MachineFunction &F) override;
+};
+} // end anonymous namespace
+
+static bool isColdBlock(MachineBasicBlock &MBB,
+ const MachineBlockFrequencyInfo *MBFI,
+ ProfileSummaryInfo *PSI) {
+ Optional<uint64_t> Count = MBFI->getBlockProfileCount(&MBB);
+ if (!Count.hasValue())
+ return true;
+
+ if (PercentileCutoff > 0) {
+ return PSI->isColdCountNthPercentile(PercentileCutoff, *Count);
+ }
+ return (*Count < ColdCountThreshold);
+}
+
+bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) {
+ // TODO: We only target functions with profile data. Static information may
+ // also be considered but we don't see performance improvements yet.
+ if (!MF.getFunction().hasProfileData())
+ return false;
+
+ // TODO: We don't split functions where a section attribute has been set
+ // since the split part may not be placed in a contiguous region. It may also
+ // be more beneficial to augment the linker to ensure contiguous layout of
+ // split functions within the same section as specified by the attribute.
+ if (!MF.getFunction().getSection().empty())
+ return false;
+
+ // We don't want to proceed further for cold functions
+ // or functions of unknown hotness. Lukewarm functions have no prefix.
+ Optional<StringRef> SectionPrefix = MF.getFunction().getSectionPrefix();
+ if (SectionPrefix.hasValue() &&
+ (SectionPrefix.getValue().equals("unlikely") ||
+ SectionPrefix.getValue().equals("unknown"))) {
+ return false;
+ }
+
+ // Renumbering blocks here preserves the order of the blocks as
+ // sortBasicBlocksAndUpdateBranches uses the numeric identifier to sort
+ // blocks. Preserving the order of blocks is essential to retaining decisions
+ // made by prior passes such as MachineBlockPlacement.
+ MF.RenumberBlocks();
+ MF.setBBSectionsType(BasicBlockSection::Preset);
+ auto *MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
+ auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+
+ for (auto &MBB : MF) {
+ // FIXME: We retain the entry block and conservatively keep all landing pad
+ // blocks as part of the original function. Once D73739 is submitted, we can
+ // improve the handling of ehpads.
+ if ((MBB.pred_empty() || MBB.isEHPad()))
+ continue;
+ if (isColdBlock(MBB, MBFI, PSI))
+ MBB.setSectionID(MBBSectionID::ColdSectionID);
+ }
+
+ auto Comparator = [](const MachineBasicBlock &X, const MachineBasicBlock &Y) {
+ return X.getSectionID().Type < Y.getSectionID().Type;
+ };
+ llvm::sortBasicBlocksAndUpdateBranches(MF, Comparator);
+
+ return true;
+}
+
+void MachineFunctionSplitter::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineModuleInfoWrapperPass>();
+ AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
+}
+
+char MachineFunctionSplitter::ID = 0;
+INITIALIZE_PASS(MachineFunctionSplitter, "machine-function-splitter",
+ "Split machine functions using profile information", false,
+ false)
+
+MachineFunctionPass *llvm::createMachineFunctionSplitterPass() {
+ return new MachineFunctionSplitter();
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp
index d4181591deab..59d98054e3a2 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp
@@ -34,6 +34,7 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
@@ -116,7 +117,7 @@ void MachineInstr::addImplicitDefUseOperands(MachineFunction &MF) {
/// the MCInstrDesc.
MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &tid,
DebugLoc dl, bool NoImp)
- : MCID(&tid), debugLoc(std::move(dl)) {
+ : MCID(&tid), debugLoc(std::move(dl)), DebugInstrNum(0) {
assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor");
// Reserve space for the expected number of operands.
@@ -130,10 +131,12 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &tid,
addImplicitDefUseOperands(MF);
}
-/// MachineInstr ctor - Copies MachineInstr arg exactly
-///
+/// MachineInstr ctor - Copies MachineInstr arg exactly.
+/// Does not copy the number from debug instruction numbering, to preserve
+/// uniqueness.
MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI)
- : MCID(&MI.getDesc()), Info(MI.Info), debugLoc(MI.getDebugLoc()) {
+ : MCID(&MI.getDesc()), Info(MI.Info), debugLoc(MI.getDebugLoc()),
+ DebugInstrNum(0) {
assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor");
CapOperands = OperandCapacity::get(MI.getNumOperands());
@@ -147,6 +150,10 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI)
setFlags(MI.Flags);
}
+void MachineInstr::moveBefore(MachineInstr *MovePos) {
+ MovePos->getParent()->splice(MovePos, getParent(), getIterator());
+}
+
/// getRegInfo - If this instruction is embedded into a MachineFunction,
/// return the MachineRegisterInfo object for the current function, otherwise
/// return null.
@@ -701,11 +708,10 @@ bool MachineInstr::isCandidateForCallSiteEntry(QueryType Type) const {
if (!isCall(Type))
return false;
switch (getOpcode()) {
- case TargetOpcode::PATCHABLE_EVENT_CALL:
- case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
case TargetOpcode::PATCHPOINT:
case TargetOpcode::STACKMAP:
case TargetOpcode::STATEPOINT:
+ case TargetOpcode::FENTRY_CALL:
return false;
}
return true;
@@ -835,27 +841,27 @@ const DILabel *MachineInstr::getDebugLabel() const {
}
const MachineOperand &MachineInstr::getDebugVariableOp() const {
- assert(isDebugValue() && "not a DBG_VALUE");
+ assert((isDebugValue() || isDebugRef()) && "not a DBG_VALUE");
return getOperand(2);
}
MachineOperand &MachineInstr::getDebugVariableOp() {
- assert(isDebugValue() && "not a DBG_VALUE");
+ assert((isDebugValue() || isDebugRef()) && "not a DBG_VALUE");
return getOperand(2);
}
const DILocalVariable *MachineInstr::getDebugVariable() const {
- assert(isDebugValue() && "not a DBG_VALUE");
+ assert((isDebugValue() || isDebugRef()) && "not a DBG_VALUE");
return cast<DILocalVariable>(getOperand(2).getMetadata());
}
MachineOperand &MachineInstr::getDebugExpressionOp() {
- assert(isDebugValue() && "not a DBG_VALUE");
+ assert((isDebugValue() || isDebugRef()) && "not a DBG_VALUE");
return getOperand(3);
}
const DIExpression *MachineInstr::getDebugExpression() const {
- assert(isDebugValue() && "not a DBG_VALUE");
+ assert((isDebugValue() || isDebugRef()) && "not a DBG_VALUE");
return cast<DIExpression>(getOperand(3).getMetadata());
}
@@ -1094,10 +1100,12 @@ void MachineInstr::tieOperands(unsigned DefIdx, unsigned UseIdx) {
if (DefIdx < TiedMax)
UseMO.TiedTo = DefIdx + 1;
else {
- // Inline asm can use the group descriptors to find tied operands, but on
- // normal instruction, the tied def must be within the first TiedMax
+ // Inline asm can use the group descriptors to find tied operands,
+ // statepoint tied operands are trivial to match (1-1 reg def with reg use),
+ // but on normal instruction, the tied def must be within the first TiedMax
// operands.
- assert(isInlineAsm() && "DefIdx out of range");
+ assert((isInlineAsm() || getOpcode() == TargetOpcode::STATEPOINT) &&
+ "DefIdx out of range");
UseMO.TiedTo = TiedMax;
}
@@ -1117,7 +1125,7 @@ unsigned MachineInstr::findTiedOperandIdx(unsigned OpIdx) const {
return MO.TiedTo - 1;
// Uses on normal instructions can be out of range.
- if (!isInlineAsm()) {
+ if (!isInlineAsm() && getOpcode() != TargetOpcode::STATEPOINT) {
// Normal tied defs must be in the 0..TiedMax-1 range.
if (MO.isUse())
return TiedMax - 1;
@@ -1130,6 +1138,25 @@ unsigned MachineInstr::findTiedOperandIdx(unsigned OpIdx) const {
llvm_unreachable("Can't find tied use");
}
+ if (getOpcode() == TargetOpcode::STATEPOINT) {
+ // In STATEPOINT defs correspond 1-1 to GC pointer operands passed
+ // on registers.
+ StatepointOpers SO(this);
+ unsigned CurUseIdx = SO.getFirstGCPtrIdx();
+ assert(CurUseIdx != -1U && "only gc pointer statepoint operands can be tied");
+ unsigned NumDefs = getNumDefs();
+ for (unsigned CurDefIdx = 0; CurDefIdx < NumDefs; ++CurDefIdx) {
+ while (!getOperand(CurUseIdx).isReg())
+ CurUseIdx = StackMaps::getNextMetaArgIdx(this, CurUseIdx);
+ if (OpIdx == CurDefIdx)
+ return CurUseIdx;
+ if (OpIdx == CurUseIdx)
+ return CurDefIdx;
+ CurUseIdx = StackMaps::getNextMetaArgIdx(this, CurUseIdx);
+ }
+ llvm_unreachable("Can't find tied use");
+ }
+
// Now deal with inline asm by parsing the operand group descriptor flags.
// Find the beginning of each operand group.
SmallVector<unsigned, 8> GroupIdx;
@@ -1213,7 +1240,7 @@ bool MachineInstr::isSafeToMove(AAResults *AA, bool &SawStore) const {
// See if this instruction does a load. If so, we have to guarantee that the
// loaded value doesn't change between the load and the its intended
- // destination. The check for isInvariantLoad gives the targe the chance to
+ // destination. The check for isInvariantLoad gives the target the chance to
// classify the load as always returning a constant, e.g. a constant pool
// load.
if (mayLoad() && !isDereferenceableInvariantLoad(AA))
@@ -1224,47 +1251,21 @@ bool MachineInstr::isSafeToMove(AAResults *AA, bool &SawStore) const {
return true;
}
-bool MachineInstr::mayAlias(AAResults *AA, const MachineInstr &Other,
- bool UseTBAA) const {
- const MachineFunction *MF = getMF();
- const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
- const MachineFrameInfo &MFI = MF->getFrameInfo();
-
- // If neither instruction stores to memory, they can't alias in any
- // meaningful way, even if they read from the same address.
- if (!mayStore() && !Other.mayStore())
- return false;
-
- // Both instructions must be memory operations to be able to alias.
- if (!mayLoadOrStore() || !Other.mayLoadOrStore())
- return false;
-
- // Let the target decide if memory accesses cannot possibly overlap.
- if (TII->areMemAccessesTriviallyDisjoint(*this, Other))
- return false;
-
- // FIXME: Need to handle multiple memory operands to support all targets.
- if (!hasOneMemOperand() || !Other.hasOneMemOperand())
- return true;
-
- MachineMemOperand *MMOa = *memoperands_begin();
- MachineMemOperand *MMOb = *Other.memoperands_begin();
-
- // The following interface to AA is fashioned after DAGCombiner::isAlias
- // and operates with MachineMemOperand offset with some important
- // assumptions:
+static bool MemOperandsHaveAlias(const MachineFrameInfo &MFI, AAResults *AA,
+ bool UseTBAA, const MachineMemOperand *MMOa,
+ const MachineMemOperand *MMOb) {
+ // The following interface to AA is fashioned after DAGCombiner::isAlias and
+ // operates with MachineMemOperand offset with some important assumptions:
// - LLVM fundamentally assumes flat address spaces.
- // - MachineOperand offset can *only* result from legalization and
- // cannot affect queries other than the trivial case of overlap
- // checking.
- // - These offsets never wrap and never step outside
- // of allocated objects.
+ // - MachineOperand offset can *only* result from legalization and cannot
+ // affect queries other than the trivial case of overlap checking.
+ // - These offsets never wrap and never step outside of allocated objects.
// - There should never be any negative offsets here.
//
// FIXME: Modify API to hide this math from "user"
- // Even before we go to AA we can reason locally about some
- // memory objects. It can save compile time, and possibly catch some
- // corner cases not currently covered.
+ // Even before we go to AA we can reason locally about some memory objects. It
+ // can save compile time, and possibly catch some corner cases not currently
+ // covered.
int64_t OffsetA = MMOa->getOffset();
int64_t OffsetB = MMOb->getOffset();
@@ -1306,20 +1307,63 @@ bool MachineInstr::mayAlias(AAResults *AA, const MachineInstr &Other,
assert((OffsetA >= 0) && "Negative MachineMemOperand offset");
assert((OffsetB >= 0) && "Negative MachineMemOperand offset");
- int64_t OverlapA = KnownWidthA ? WidthA + OffsetA - MinOffset
- : MemoryLocation::UnknownSize;
- int64_t OverlapB = KnownWidthB ? WidthB + OffsetB - MinOffset
- : MemoryLocation::UnknownSize;
+ int64_t OverlapA =
+ KnownWidthA ? WidthA + OffsetA - MinOffset : MemoryLocation::UnknownSize;
+ int64_t OverlapB =
+ KnownWidthB ? WidthB + OffsetB - MinOffset : MemoryLocation::UnknownSize;
AliasResult AAResult = AA->alias(
- MemoryLocation(ValA, OverlapA,
- UseTBAA ? MMOa->getAAInfo() : AAMDNodes()),
+ MemoryLocation(ValA, OverlapA, UseTBAA ? MMOa->getAAInfo() : AAMDNodes()),
MemoryLocation(ValB, OverlapB,
UseTBAA ? MMOb->getAAInfo() : AAMDNodes()));
return (AAResult != NoAlias);
}
+bool MachineInstr::mayAlias(AAResults *AA, const MachineInstr &Other,
+ bool UseTBAA) const {
+ const MachineFunction *MF = getMF();
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+ const MachineFrameInfo &MFI = MF->getFrameInfo();
+
+ // Exclude call instruction which may alter the memory but can not be handled
+ // by this function.
+ if (isCall() || Other.isCall())
+ return true;
+
+ // If neither instruction stores to memory, they can't alias in any
+ // meaningful way, even if they read from the same address.
+ if (!mayStore() && !Other.mayStore())
+ return false;
+
+ // Both instructions must be memory operations to be able to alias.
+ if (!mayLoadOrStore() || !Other.mayLoadOrStore())
+ return false;
+
+ // Let the target decide if memory accesses cannot possibly overlap.
+ if (TII->areMemAccessesTriviallyDisjoint(*this, Other))
+ return false;
+
+ // Memory operations without memory operands may access anything. Be
+ // conservative and assume `MayAlias`.
+ if (memoperands_empty() || Other.memoperands_empty())
+ return true;
+
+ // Skip if there are too many memory operands.
+ auto NumChecks = getNumMemOperands() * Other.getNumMemOperands();
+ if (NumChecks > TII->getMemOperandAACheckLimit())
+ return true;
+
+ // Check each pair of memory operands from both instructions, which can't
+ // alias only if all pairs won't alias.
+ for (auto *MMOa : memoperands())
+ for (auto *MMOb : Other.memoperands())
+ if (MemOperandsHaveAlias(MFI, AA, UseTBAA, MMOa, MMOb))
+ return true;
+
+ return false;
+}
+
/// hasOrderedMemoryRef - Return true if this instruction may have an ordered
/// or volatile memory reference, or if the information describing the memory
/// reference is not available. Return false if it is known to have no ordered
@@ -1447,6 +1491,8 @@ void MachineInstr::copyImplicitOps(MachineFunction &MF,
bool MachineInstr::hasComplexRegisterTies() const {
const MCInstrDesc &MCID = getDesc();
+ if (MCID.Opcode == TargetOpcode::STATEPOINT)
+ return true;
for (unsigned I = 0, E = getNumOperands(); I < E; ++I) {
const auto &Operand = getOperand(I);
if (!Operand.isReg() || Operand.isDef())
@@ -1753,6 +1799,12 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
HeapAllocMarker->printAsOperand(OS, MST);
}
+ if (DebugInstrNum) {
+ if (!FirstOp)
+ OS << ",";
+ OS << " debug-instr-number " << DebugInstrNum;
+ }
+
if (!SkipDebugLoc) {
if (const DebugLoc &DL = getDebugLoc()) {
if (!FirstOp)
@@ -2227,3 +2279,9 @@ MachineInstr::getFoldedRestoreSize(const TargetInstrInfo *TII) const {
return getSpillSlotSize(Accesses, getMF()->getFrameInfo());
return None;
}
+
+unsigned MachineInstr::getDebugInstrNum() {
+ if (DebugInstrNum == 0)
+ DebugInstrNum = getParent()->getParent()->getNewDebugInstrNum();
+ return DebugInstrNum;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp
index 5e8a916b3b3b..c06bc39b4940 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp
@@ -42,6 +42,7 @@
#include "llvm/IR/DebugLoc.h"
#include "llvm/InitializePasses.h"
#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegister.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
@@ -90,7 +91,7 @@ static cl::opt<UseBFI>
DisableHoistingToHotterBlocks("disable-hoisting-to-hotter-blocks",
cl::desc("Disable hoisting instructions to"
" hotter blocks"),
- cl::init(UseBFI::None), cl::Hidden,
+ cl::init(UseBFI::PGO), cl::Hidden,
cl::values(clEnumValN(UseBFI::None, "none",
"disable the feature"),
clEnumValN(UseBFI::PGO, "pgo",
@@ -145,7 +146,7 @@ namespace {
}
// Track 'estimated' register pressure.
- SmallSet<unsigned, 32> RegSeen;
+ SmallSet<Register, 32> RegSeen;
SmallVector<unsigned, 8> RegPressure;
// Register pressure "limit" per register pressure set. If the pressure
@@ -156,7 +157,7 @@ namespace {
SmallVector<SmallVector<unsigned, 8>, 16> BackTrace;
// For each opcode, keep a list of potential CSE instructions.
- DenseMap<unsigned, std::vector<const MachineInstr *>> CSEMap;
+ DenseMap<unsigned, std::vector<MachineInstr *>> CSEMap;
enum {
SpeculateFalse = 0,
@@ -212,7 +213,7 @@ namespace {
BitVector &PhysRegClobbers, SmallSet<int, 32> &StoredFIs,
SmallVectorImpl<CandidateInfo> &Candidates);
- void AddToLiveIns(unsigned Reg);
+ void AddToLiveIns(MCRegister Reg);
bool IsLICMCandidate(MachineInstr &I);
@@ -221,7 +222,7 @@ namespace {
bool HasLoopPHIUse(const MachineInstr *MI) const;
bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx,
- unsigned Reg) const;
+ Register Reg) const;
bool IsCheapInstruction(MachineInstr &MI) const;
@@ -245,8 +246,6 @@ namespace {
void HoistOutOfLoop(MachineDomTreeNode *HeaderN);
- void HoistRegion(MachineDomTreeNode *N, bool IsHeader);
-
void SinkIntoLoop();
void InitRegPressure(MachineBasicBlock *BB);
@@ -260,13 +259,12 @@ namespace {
MachineInstr *ExtractHoistableLoad(MachineInstr *MI);
- const MachineInstr *
- LookForDuplicate(const MachineInstr *MI,
- std::vector<const MachineInstr *> &PrevMIs);
+ MachineInstr *LookForDuplicate(const MachineInstr *MI,
+ std::vector<MachineInstr *> &PrevMIs);
- bool EliminateCSE(
- MachineInstr *MI,
- DenseMap<unsigned, std::vector<const MachineInstr *>>::iterator &CI);
+ bool
+ EliminateCSE(MachineInstr *MI,
+ DenseMap<unsigned, std::vector<MachineInstr *>>::iterator &CI);
bool MayCSE(MachineInstr *MI);
@@ -606,7 +604,7 @@ void MachineLICMBase::HoistRegionPostRA() {
/// Add register 'Reg' to the livein sets of BBs in the current loop, and make
/// sure it is not killed by any instructions in the loop.
-void MachineLICMBase::AddToLiveIns(unsigned Reg) {
+void MachineLICMBase::AddToLiveIns(MCRegister Reg) {
for (MachineBasicBlock *BB : CurLoop->getBlocks()) {
if (!BB->isLiveIn(Reg))
BB->addLiveIn(Reg);
@@ -802,8 +800,13 @@ void MachineLICMBase::SinkIntoLoop() {
I != Preheader->instr_end(); ++I) {
// We need to ensure that we can safely move this instruction into the loop.
// As such, it must not have side-effects, e.g. such as a call has.
- if (IsLoopInvariantInst(*I) && !HasLoopPHIUse(&*I))
+ LLVM_DEBUG(dbgs() << "LICM: Analysing sink candidate: " << *I);
+ if (IsLoopInvariantInst(*I) && !HasLoopPHIUse(&*I)) {
+ LLVM_DEBUG(dbgs() << "LICM: Added as sink candidate.\n");
Candidates.push_back(&*I);
+ continue;
+ }
+ LLVM_DEBUG(dbgs() << "LICM: Not added as sink candidate.\n");
}
for (MachineInstr *I : Candidates) {
@@ -813,8 +816,11 @@ void MachineLICMBase::SinkIntoLoop() {
if (!MRI->hasOneDef(MO.getReg()))
continue;
bool CanSink = true;
- MachineBasicBlock *B = nullptr;
+ MachineBasicBlock *SinkBlock = nullptr;
+ LLVM_DEBUG(dbgs() << "LICM: Try sinking: " << *I);
+
for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) {
+ LLVM_DEBUG(dbgs() << "LICM: Analysing use: "; MI.dump());
// FIXME: Come up with a proper cost model that estimates whether sinking
// the instruction (and thus possibly executing it on every loop
// iteration) is more expensive than a register.
@@ -823,24 +829,40 @@ void MachineLICMBase::SinkIntoLoop() {
CanSink = false;
break;
}
- if (!B) {
- B = MI.getParent();
+ if (!SinkBlock) {
+ SinkBlock = MI.getParent();
+ LLVM_DEBUG(dbgs() << "LICM: Setting sink block to: "
+ << printMBBReference(*SinkBlock) << "\n");
continue;
}
- B = DT->findNearestCommonDominator(B, MI.getParent());
- if (!B) {
+ SinkBlock = DT->findNearestCommonDominator(SinkBlock, MI.getParent());
+ if (!SinkBlock) {
+ LLVM_DEBUG(dbgs() << "LICM: Can't find nearest dominator\n");
CanSink = false;
break;
}
+ LLVM_DEBUG(dbgs() << "LICM: Setting nearest common dom block: " <<
+ printMBBReference(*SinkBlock) << "\n");
+ }
+ if (!CanSink) {
+ LLVM_DEBUG(dbgs() << "LICM: Can't sink instruction.\n");
+ continue;
+ }
+ if (!SinkBlock) {
+ LLVM_DEBUG(dbgs() << "LICM: Not sinking, can't find sink block.\n");
+ continue;
}
- if (!CanSink || !B || B == Preheader)
+ if (SinkBlock == Preheader) {
+ LLVM_DEBUG(dbgs() << "LICM: Not sinking, sink block is the preheader\n");
continue;
+ }
- LLVM_DEBUG(dbgs() << "Sinking to " << printMBBReference(*B) << " from "
- << printMBBReference(*I->getParent()) << ": " << *I);
- B->splice(B->getFirstNonPHI(), Preheader, I);
+ LLVM_DEBUG(dbgs() << "LICM: Sinking to " << printMBBReference(*SinkBlock)
+ << " from " << printMBBReference(*I->getParent())
+ << ": " << *I);
+ SinkBlock->splice(SinkBlock->getFirstNonPHI(), Preheader, I);
- // The instruction is is moved from its basic block, so do not retain the
+ // The instruction is moved from its basic block, so do not retain the
// debug information.
assert(!I->isDebugInstr() && "Should not sink debug inst");
I->setDebugLoc(DebugLoc());
@@ -978,7 +1000,7 @@ static bool isInvariantStore(const MachineInstr &MI,
Reg = TRI->lookThruCopyLike(MO.getReg(), MRI);
if (Register::isVirtualRegister(Reg))
return false;
- if (!TRI->isCallerPreservedPhysReg(Reg, *MI.getMF()))
+ if (!TRI->isCallerPreservedPhysReg(Reg.asMCReg(), *MI.getMF()))
return false;
else
FoundCallerPresReg = true;
@@ -1008,7 +1030,7 @@ static bool isCopyFeedingInvariantStore(const MachineInstr &MI,
if (Register::isVirtualRegister(CopySrcReg))
return false;
- if (!TRI->isCallerPreservedPhysReg(CopySrcReg, *MF))
+ if (!TRI->isCallerPreservedPhysReg(CopySrcReg.asMCReg(), *MF))
return false;
Register CopyDstReg = MI.getOperand(0).getReg();
@@ -1030,6 +1052,7 @@ bool MachineLICMBase::IsLICMCandidate(MachineInstr &I) {
bool DontMoveAcrossStore = true;
if ((!I.isSafeToMove(AA, DontMoveAcrossStore)) &&
!(HoistConstStores && isInvariantStore(I, TRI, MRI))) {
+ LLVM_DEBUG(dbgs() << "LICM: Instruction not safe to move.\n");
return false;
}
@@ -1040,65 +1063,28 @@ bool MachineLICMBase::IsLICMCandidate(MachineInstr &I) {
// indexed load from a jump table.
// Stores and side effects are already checked by isSafeToMove.
if (I.mayLoad() && !mayLoadFromGOTOrConstantPool(I) &&
- !IsGuaranteedToExecute(I.getParent()))
+ !IsGuaranteedToExecute(I.getParent())) {
+ LLVM_DEBUG(dbgs() << "LICM: Load not guaranteed to execute.\n");
+ return false;
+ }
+
+ // Convergent attribute has been used on operations that involve inter-thread
+ // communication which results are implicitly affected by the enclosing
+ // control flows. It is not safe to hoist or sink such operations across
+ // control flow.
+ if (I.isConvergent())
return false;
return true;
}
/// Returns true if the instruction is loop invariant.
-/// I.e., all virtual register operands are defined outside of the loop,
-/// physical registers aren't accessed explicitly, and there are no side
-/// effects that aren't captured by the operands or other flags.
bool MachineLICMBase::IsLoopInvariantInst(MachineInstr &I) {
- if (!IsLICMCandidate(I))
+ if (!IsLICMCandidate(I)) {
+ LLVM_DEBUG(dbgs() << "LICM: Instruction not a LICM candidate\n");
return false;
-
- // The instruction is loop invariant if all of its operands are.
- for (const MachineOperand &MO : I.operands()) {
- if (!MO.isReg())
- continue;
-
- Register Reg = MO.getReg();
- if (Reg == 0) continue;
-
- // Don't hoist an instruction that uses or defines a physical register.
- if (Register::isPhysicalRegister(Reg)) {
- if (MO.isUse()) {
- // If the physreg has no defs anywhere, it's just an ambient register
- // and we can freely move its uses. Alternatively, if it's allocatable,
- // it could get allocated to something with a def during allocation.
- // However, if the physreg is known to always be caller saved/restored
- // then this use is safe to hoist.
- if (!MRI->isConstantPhysReg(Reg) &&
- !(TRI->isCallerPreservedPhysReg(Reg, *I.getMF())))
- return false;
- // Otherwise it's safe to move.
- continue;
- } else if (!MO.isDead()) {
- // A def that isn't dead. We can't move it.
- return false;
- } else if (CurLoop->getHeader()->isLiveIn(Reg)) {
- // If the reg is live into the loop, we can't hoist an instruction
- // which would clobber it.
- return false;
- }
- }
-
- if (!MO.isUse())
- continue;
-
- assert(MRI->getVRegDef(Reg) &&
- "Machine instr not mapped for this vreg?!");
-
- // If the loop contains the definition of an operand, then the instruction
- // isn't loop invariant.
- if (CurLoop->contains(MRI->getVRegDef(Reg)))
- return false;
}
-
- // If we got this far, the instruction is loop invariant!
- return true;
+ return CurLoop->isLoopInvariant(I);
}
/// Return true if the specified instruction is used by a phi node and hoisting
@@ -1138,9 +1124,8 @@ bool MachineLICMBase::HasLoopPHIUse(const MachineInstr *MI) const {
/// Compute operand latency between a def of 'Reg' and an use in the current
/// loop, return true if the target considered it high.
-bool MachineLICMBase::HasHighOperandLatency(MachineInstr &MI,
- unsigned DefIdx,
- unsigned Reg) const {
+bool MachineLICMBase::HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx,
+ Register Reg) const {
if (MRI->use_nodbg_empty(Reg))
return false;
@@ -1400,10 +1385,10 @@ void MachineLICMBase::InitCSEMap(MachineBasicBlock *BB) {
/// Find an instruction amount PrevMIs that is a duplicate of MI.
/// Return this instruction if it's found.
-const MachineInstr*
+MachineInstr *
MachineLICMBase::LookForDuplicate(const MachineInstr *MI,
- std::vector<const MachineInstr*> &PrevMIs) {
- for (const MachineInstr *PrevMI : PrevMIs)
+ std::vector<MachineInstr *> &PrevMIs) {
+ for (MachineInstr *PrevMI : PrevMIs)
if (TII->produceSameValue(*MI, *PrevMI, (PreRegAlloc ? MRI : nullptr)))
return PrevMI;
@@ -1414,14 +1399,15 @@ MachineLICMBase::LookForDuplicate(const MachineInstr *MI,
/// computes the same value. If it's found, do a RAU on with the definition of
/// the existing instruction rather than hoisting the instruction to the
/// preheader.
-bool MachineLICMBase::EliminateCSE(MachineInstr *MI,
- DenseMap<unsigned, std::vector<const MachineInstr *>>::iterator &CI) {
+bool MachineLICMBase::EliminateCSE(
+ MachineInstr *MI,
+ DenseMap<unsigned, std::vector<MachineInstr *>>::iterator &CI) {
// Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
// the undef property onto uses.
if (CI == CSEMap.end() || MI->isImplicitDef())
return false;
- if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) {
+ if (MachineInstr *Dup = LookForDuplicate(MI, CI->second)) {
LLVM_DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup);
// Replace virtual registers defined by MI by their counterparts defined
@@ -1461,6 +1447,9 @@ bool MachineLICMBase::EliminateCSE(MachineInstr *MI,
Register DupReg = Dup->getOperand(Idx).getReg();
MRI->replaceRegWith(Reg, DupReg);
MRI->clearKillFlags(DupReg);
+ // Clear Dup dead flag if any, we reuse it for Reg.
+ if (!MRI->use_nodbg_empty(DupReg))
+ Dup->getOperand(Idx).setIsDead(false);
}
MI->eraseFromParent();
@@ -1474,8 +1463,8 @@ bool MachineLICMBase::EliminateCSE(MachineInstr *MI,
/// the loop.
bool MachineLICMBase::MayCSE(MachineInstr *MI) {
unsigned Opcode = MI->getOpcode();
- DenseMap<unsigned, std::vector<const MachineInstr *>>::iterator
- CI = CSEMap.find(Opcode);
+ DenseMap<unsigned, std::vector<MachineInstr *>>::iterator CI =
+ CSEMap.find(Opcode);
// Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
// the undef property onto uses.
if (CI == CSEMap.end() || MI->isImplicitDef())
@@ -1529,8 +1518,8 @@ bool MachineLICMBase::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
// Look for opportunity to CSE the hoisted instruction.
unsigned Opcode = MI->getOpcode();
- DenseMap<unsigned, std::vector<const MachineInstr *>>::iterator
- CI = CSEMap.find(Opcode);
+ DenseMap<unsigned, std::vector<MachineInstr *>>::iterator CI =
+ CSEMap.find(Opcode);
if (!EliminateCSE(MI, CI)) {
// Otherwise, splice the instruction to the preheader.
Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopInfo.cpp
index 0c1439da9b29..78480d0e1488 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopInfo.cpp
@@ -16,11 +16,14 @@
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/Analysis/LoopInfoImpl.h"
#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+
using namespace llvm;
// Explicitly instantiate methods in LoopInfoImpl.h for MI-level Loops.
@@ -146,6 +149,59 @@ MachineLoopInfo::findLoopPreheader(MachineLoop *L,
return Preheader;
}
+bool MachineLoop::isLoopInvariant(MachineInstr &I) const {
+ MachineFunction *MF = I.getParent()->getParent();
+ MachineRegisterInfo *MRI = &MF->getRegInfo();
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+
+ // The instruction is loop invariant if all of its operands are.
+ for (const MachineOperand &MO : I.operands()) {
+ if (!MO.isReg())
+ continue;
+
+ Register Reg = MO.getReg();
+ if (Reg == 0) continue;
+
+ // An instruction that uses or defines a physical register can't e.g. be
+ // hoisted, so mark this as not invariant.
+ if (Register::isPhysicalRegister(Reg)) {
+ if (MO.isUse()) {
+ // If the physreg has no defs anywhere, it's just an ambient register
+ // and we can freely move its uses. Alternatively, if it's allocatable,
+ // it could get allocated to something with a def during allocation.
+ // However, if the physreg is known to always be caller saved/restored
+ // then this use is safe to hoist.
+ if (!MRI->isConstantPhysReg(Reg) &&
+ !(TRI->isCallerPreservedPhysReg(Reg.asMCReg(), *I.getMF())))
+ return false;
+ // Otherwise it's safe to move.
+ continue;
+ } else if (!MO.isDead()) {
+ // A def that isn't dead can't be moved.
+ return false;
+ } else if (getHeader()->isLiveIn(Reg)) {
+ // If the reg is live into the loop, we can't hoist an instruction
+ // which would clobber it.
+ return false;
+ }
+ }
+
+ if (!MO.isUse())
+ continue;
+
+ assert(MRI->getVRegDef(Reg) &&
+ "Machine instr not mapped for this vreg?!");
+
+ // If the loop contains the definition of an operand, then the instruction
+ // isn't loop invariant.
+ if (contains(MRI->getVRegDef(Reg)))
+ return false;
+ }
+
+ // If we got this far, the instruction is loop invariant!
+ return true;
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void MachineLoop::dump() const {
print(dbgs());
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopUtils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopUtils.cpp
index 2295e1ca6d4e..fdcc8472f1c2 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopUtils.cpp
@@ -130,14 +130,3 @@ MachineBasicBlock *llvm::PeelSingleBlockLoop(LoopPeelDirection Direction,
return NewBB;
}
-
-bool llvm::isRegLiveInExitBlocks(MachineLoop *Loop, int PhysReg) {
- SmallVector<MachineBasicBlock *, 4> ExitBlocks;
- Loop->getExitBlocks(ExitBlocks);
-
- for (auto *MBB : ExitBlocks)
- if (MBB->isLiveIn(PhysReg))
- return true;
-
- return false;
-}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfo.cpp
index f866c7ca53c6..5565b9cededa 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfo.cpp
@@ -104,7 +104,8 @@ ArrayRef<MCSymbol *> MMIAddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) {
BBCallbacks.back().setMap(this);
Entry.Index = BBCallbacks.size() - 1;
Entry.Fn = BB->getParent();
- MCSymbol *Sym = Context.createTempSymbol(!BB->hasAddressTaken());
+ MCSymbol *Sym = BB->hasAddressTaken() ? Context.createNamedTempSymbol()
+ : Context.createTempSymbol();
Entry.Symbols.push_back(Sym);
return Entry.Symbols;
}
@@ -143,8 +144,7 @@ void MMIAddrLabelMap::UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New) {
BBCallbacks[OldEntry.Index] = nullptr; // Update the callback.
// Otherwise, we need to add the old symbols to the new block's set.
- NewEntry.Symbols.insert(NewEntry.Symbols.end(), OldEntry.Symbols.begin(),
- OldEntry.Symbols.end());
+ llvm::append_range(NewEntry.Symbols, OldEntry.Symbols);
}
void MMIAddrLabelMapCallbackPtr::deleted() {
@@ -170,6 +170,7 @@ void MachineModuleInfo::finalize() {
AddrLabelSymbols = nullptr;
Context.reset();
+ // We don't clear the ExternalContext.
delete ObjFileMMI;
ObjFileMMI = nullptr;
@@ -178,7 +179,8 @@ void MachineModuleInfo::finalize() {
MachineModuleInfo::MachineModuleInfo(MachineModuleInfo &&MMI)
: TM(std::move(MMI.TM)),
Context(MMI.TM.getMCAsmInfo(), MMI.TM.getMCRegisterInfo(),
- MMI.TM.getObjFileLowering(), nullptr, nullptr, false) {
+ MMI.TM.getObjFileLowering(), nullptr, nullptr, false),
+ MachineFunctions(std::move(MMI.MachineFunctions)) {
ObjFileMMI = MMI.ObjFileMMI;
CurCallSite = MMI.CurCallSite;
UsesMSVCFloatingPoint = MMI.UsesMSVCFloatingPoint;
@@ -186,6 +188,7 @@ MachineModuleInfo::MachineModuleInfo(MachineModuleInfo &&MMI)
HasSplitStack = MMI.HasSplitStack;
HasNosplitStack = MMI.HasNosplitStack;
AddrLabelSymbols = MMI.AddrLabelSymbols;
+ ExternalContext = MMI.ExternalContext;
TheModule = MMI.TheModule;
}
@@ -195,6 +198,14 @@ MachineModuleInfo::MachineModuleInfo(const LLVMTargetMachine *TM)
initialize();
}
+MachineModuleInfo::MachineModuleInfo(const LLVMTargetMachine *TM,
+ MCContext *ExtContext)
+ : TM(*TM), Context(TM->getMCAsmInfo(), TM->getMCRegisterInfo(),
+ TM->getObjFileLowering(), nullptr, nullptr, false),
+ ExternalContext(ExtContext) {
+ initialize();
+}
+
MachineModuleInfo::~MachineModuleInfo() { finalize(); }
//===- Address of Block Management ----------------------------------------===//
@@ -203,7 +214,7 @@ ArrayRef<MCSymbol *>
MachineModuleInfo::getAddrLabelSymbolToEmit(const BasicBlock *BB) {
// Lazily create AddrLabelSymbols.
if (!AddrLabelSymbols)
- AddrLabelSymbols = new MMIAddrLabelMap(Context);
+ AddrLabelSymbols = new MMIAddrLabelMap(getContext());
return AddrLabelSymbols->getAddrLabelSymbolToEmit(const_cast<BasicBlock*>(BB));
}
@@ -295,6 +306,12 @@ MachineModuleInfoWrapperPass::MachineModuleInfoWrapperPass(
initializeMachineModuleInfoWrapperPassPass(*PassRegistry::getPassRegistry());
}
+MachineModuleInfoWrapperPass::MachineModuleInfoWrapperPass(
+ const LLVMTargetMachine *TM, MCContext *ExtContext)
+ : ImmutablePass(ID), MMI(TM, ExtContext) {
+ initializeMachineModuleInfoWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
// Handle the Pass registration stuff necessary to use DataLayout's.
INITIALIZE_PASS(MachineModuleInfoWrapperPass, "machinemoduleinfo",
"Machine Module Information", false, false)
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp
index 2b4fd654e46c..9b09f5273298 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp
@@ -85,7 +85,7 @@ void MachineOperand::substVirtReg(Register Reg, unsigned SubIdx,
}
void MachineOperand::substPhysReg(MCRegister Reg, const TargetRegisterInfo &TRI) {
- assert(Reg.isPhysical());
+ assert(Register::isPhysicalRegister(Reg));
if (getSubReg()) {
Reg = TRI.getSubReg(Reg, getSubReg());
// Note that getSubReg() may return 0 if the sub-register doesn't exist.
@@ -153,22 +153,25 @@ void MachineOperand::removeRegFromUses() {
/// ChangeToImmediate - Replace this operand with a new immediate operand of
/// the specified value. If an operand is known to be an immediate already,
/// the setImm method should be used.
-void MachineOperand::ChangeToImmediate(int64_t ImmVal) {
+void MachineOperand::ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags) {
assert((!isReg() || !isTied()) && "Cannot change a tied operand into an imm");
removeRegFromUses();
OpKind = MO_Immediate;
Contents.ImmVal = ImmVal;
+ setTargetFlags(TargetFlags);
}
-void MachineOperand::ChangeToFPImmediate(const ConstantFP *FPImm) {
+void MachineOperand::ChangeToFPImmediate(const ConstantFP *FPImm,
+ unsigned TargetFlags) {
assert((!isReg() || !isTied()) && "Cannot change a tied operand into an imm");
removeRegFromUses();
OpKind = MO_FPImmediate;
Contents.CFP = FPImm;
+ setTargetFlags(TargetFlags);
}
void MachineOperand::ChangeToES(const char *SymName,
@@ -197,7 +200,7 @@ void MachineOperand::ChangeToGA(const GlobalValue *GV, int64_t Offset,
setTargetFlags(TargetFlags);
}
-void MachineOperand::ChangeToMCSymbol(MCSymbol *Sym) {
+void MachineOperand::ChangeToMCSymbol(MCSymbol *Sym, unsigned TargetFlags) {
assert((!isReg() || !isTied()) &&
"Cannot change a tied operand into an MCSymbol");
@@ -205,9 +208,10 @@ void MachineOperand::ChangeToMCSymbol(MCSymbol *Sym) {
OpKind = MO_MCSymbol;
Contents.Sym = Sym;
+ setTargetFlags(TargetFlags);
}
-void MachineOperand::ChangeToFrameIndex(int Idx) {
+void MachineOperand::ChangeToFrameIndex(int Idx, unsigned TargetFlags) {
assert((!isReg() || !isTied()) &&
"Cannot change a tied operand into a FrameIndex");
@@ -215,6 +219,7 @@ void MachineOperand::ChangeToFrameIndex(int Idx) {
OpKind = MO_FrameIndex;
setIndex(Idx);
+ setTargetFlags(TargetFlags);
}
void MachineOperand::ChangeToTargetIndex(unsigned Idx, int64_t Offset,
@@ -415,6 +420,11 @@ static const char *getTargetIndexName(const MachineFunction &MF, int Index) {
return nullptr;
}
+const char *MachineOperand::getTargetIndexName() const {
+ const MachineFunction *MF = getMFIfAvailable(*this);
+ return MF ? ::getTargetIndexName(*MF, this->getIndex()) : nullptr;
+}
+
static const char *getTargetFlagName(const TargetInstrInfo *TII, unsigned TF) {
auto Flags = TII->getSerializableDirectMachineOperandTargetFlags();
for (const auto &I : Flags) {
@@ -823,7 +833,7 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
OS << "target-index(";
const char *Name = "<unknown>";
if (const MachineFunction *MF = getMFIfAvailable(*this))
- if (const auto *TargetIndexName = getTargetIndexName(*MF, getIndex()))
+ if (const auto *TargetIndexName = ::getTargetIndexName(*MF, getIndex()))
Name = TargetIndexName;
OS << Name << ')';
printOperandOffset(OS, getOffset());
@@ -1142,7 +1152,7 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
const MIRFormatter *Formatter = TII->getMIRFormatter();
// FIXME: This is not necessarily the correct MIR serialization format for
// a custom pseudo source value, but at least it allows
- // -print-machineinstrs to work on a target with custom pseudo source
+ // MIR printing to work on a target with custom pseudo source
// values.
OS << "custom \"";
Formatter->printCustomPseudoSourceValue(OS, MST, *PVal);
@@ -1152,8 +1162,10 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
}
}
MachineOperand::printOperandOffset(OS, getOffset());
- if (getBaseAlign() != getSize())
- OS << ", align " << getBaseAlign().value();
+ if (getAlign() != getSize())
+ OS << ", align " << getAlign().value();
+ if (getAlign() != getBaseAlign())
+ OS << ", basealign " << getBaseAlign().value();
auto AAInfo = getAAInfo();
if (AAInfo.TBAA) {
OS << ", !tbaa ";
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp
index f9d099e02995..02998d41d831 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp
@@ -59,10 +59,8 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/Twine.h"
-#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
@@ -309,10 +307,8 @@ struct InstructionMapper {
// repeated substring.
mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB,
InstrListForMBB);
- InstrList.insert(InstrList.end(), InstrListForMBB.begin(),
- InstrListForMBB.end());
- UnsignedVec.insert(UnsignedVec.end(), UnsignedVecForMBB.begin(),
- UnsignedVecForMBB.end());
+ llvm::append_range(InstrList, InstrListForMBB);
+ llvm::append_range(UnsignedVec, UnsignedVecForMBB);
}
}
@@ -549,11 +545,10 @@ void MachineOutliner::findCandidates(
// That is, one must either
// * End before the other starts
// * Start after the other ends
- if (std::all_of(
- CandidatesForRepeatedSeq.begin(), CandidatesForRepeatedSeq.end(),
- [&StartIdx, &EndIdx](const Candidate &C) {
- return (EndIdx < C.getStartIdx() || StartIdx > C.getEndIdx());
- })) {
+ if (llvm::all_of(CandidatesForRepeatedSeq, [&StartIdx,
+ &EndIdx](const Candidate &C) {
+ return (EndIdx < C.getStartIdx() || StartIdx > C.getEndIdx());
+ })) {
// It doesn't overlap with anything, so we can outline it.
// Each sequence is over [StartIt, EndIt].
// Save the candidate and its location.
@@ -656,6 +651,8 @@ MachineFunction *MachineOutliner::createOutlinedFunction(
OriginalMF->getFrameInstructions();
for (auto I = FirstCand.front(), E = std::next(FirstCand.back()); I != E;
++I) {
+ if (I->isDebugInstr())
+ continue;
MachineInstr *NewMI = MF.CloneMachineInstr(&*I);
if (I->isCFIInstruction()) {
unsigned CFIIndex = NewMI->getOperand(0).getCFIIndex();
@@ -691,7 +688,7 @@ MachineFunction *MachineOutliner::createOutlinedFunction(
// The live-in set for the outlined function is the union of the live-ins
// from all the outlining points.
- for (MCPhysReg Reg : make_range(CandLiveIns.begin(), CandLiveIns.end()))
+ for (MCPhysReg Reg : CandLiveIns)
LiveIns.addReg(Reg);
}
addLiveIns(MBB, LiveIns);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachinePassManager.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachinePassManager.cpp
new file mode 100644
index 000000000000..e81575c88935
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachinePassManager.cpp
@@ -0,0 +1,121 @@
+//===---------- MachinePassManager.cpp ------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the pass management machinery for machine functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachinePassManager.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/PassManagerImpl.h"
+
+using namespace llvm;
+
+namespace llvm {
+template class AllAnalysesOn<MachineFunction>;
+template class AnalysisManager<MachineFunction>;
+template class PassManager<MachineFunction>;
+
+Error MachineFunctionPassManager::run(Module &M,
+ MachineFunctionAnalysisManager &MFAM) {
+ // MachineModuleAnalysis is a module analysis pass that is never invalidated
+ // because we don't run any module pass in codegen pipeline. This is very
+ // important because the codegen state is stored in MMI which is the analysis
+ // result of MachineModuleAnalysis. MMI should not be recomputed.
+ auto &MMI = MFAM.getResult<MachineModuleAnalysis>(M);
+
+ (void)RequireCodeGenSCCOrder;
+ assert(!RequireCodeGenSCCOrder && "not implemented");
+
+ // Add a PIC to verify machine functions.
+ if (VerifyMachineFunction) {
+ PassInstrumentation PI = MFAM.getResult<PassInstrumentationAnalysis>(M);
+
+ // No need to pop this callback later since MIR pipeline is flat which means
+ // current pipeline is the top-level pipeline. Callbacks are not used after
+ // current pipeline.
+ PI.pushBeforeNonSkippedPassCallback([&MFAM](StringRef PassID, Any IR) {
+ assert(any_isa<const MachineFunction *>(IR));
+ const MachineFunction *MF = any_cast<const MachineFunction *>(IR);
+ assert(MF && "Machine function should be valid for printing");
+ std::string Banner = std::string("After ") + std::string(PassID);
+ verifyMachineFunction(&MFAM, Banner, *MF);
+ });
+ }
+
+ if (DebugLogging) {
+ dbgs() << "Starting " << getTypeName<MachineFunction>()
+ << " pass manager run.\n";
+ }
+
+ for (auto &F : InitializationFuncs) {
+ if (auto Err = F(M, MFAM))
+ return Err;
+ }
+
+ unsigned Idx = 0;
+ size_t Size = Passes.size();
+ do {
+ // Run machine module passes
+ for (; MachineModulePasses.count(Idx) && Idx != Size; ++Idx) {
+ if (DebugLogging)
+ dbgs() << "Running pass: " << Passes[Idx]->name() << " on "
+ << M.getName() << '\n';
+ if (auto Err = MachineModulePasses.at(Idx)(M, MFAM))
+ return Err;
+ }
+
+ // Finish running all passes.
+ if (Idx == Size)
+ break;
+
+ // Run machine function passes
+
+ // Get index range of machine function passes.
+ unsigned Begin = Idx;
+ for (; !MachineModulePasses.count(Idx) && Idx != Size; ++Idx)
+ ;
+
+ for (Function &F : M) {
+ // Do not codegen any 'available_externally' functions at all, they have
+ // definitions outside the translation unit.
+ if (F.hasAvailableExternallyLinkage())
+ continue;
+
+ MachineFunction &MF = MMI.getOrCreateMachineFunction(F);
+ PassInstrumentation PI = MFAM.getResult<PassInstrumentationAnalysis>(MF);
+
+ for (unsigned I = Begin, E = Idx; I != E; ++I) {
+ auto *P = Passes[I].get();
+
+ if (!PI.runBeforePass<MachineFunction>(*P, MF))
+ continue;
+
+ // TODO: EmitSizeRemarks
+ PreservedAnalyses PassPA = P->run(MF, MFAM);
+ PI.runAfterPass(*P, MF, PassPA);
+ MFAM.invalidate(MF, PassPA);
+ }
+ }
+ } while (true);
+
+ for (auto &F : FinalizationFuncs) {
+ if (auto Err = F(M, MFAM))
+ return Err;
+ }
+
+ if (DebugLogging) {
+ dbgs() << "Finished " << getTypeName<MachineFunction>()
+ << " pass manager run.\n";
+ }
+
+ return Error::success();
+}
+
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp
index ef4b02ca9e3e..d0fe29f65ede 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -268,6 +268,7 @@ bool MachinePipeliner::scheduleLoop(MachineLoop &L) {
void MachinePipeliner::setPragmaPipelineOptions(MachineLoop &L) {
// Reset the pragma for the next loop in iteration.
disabledByPragma = false;
+ II_setByPragma = 0;
MachineBasicBlock *LBLK = L.getTopBlock();
@@ -441,6 +442,16 @@ bool MachinePipeliner::swingModuloScheduler(MachineLoop &L) {
return SMS.hasNewSchedule();
}
+void MachinePipeliner::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addPreserved<AAResultsWrapperPass>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<LiveIntervals>();
+ AU.addRequired<MachineOptimizationRemarkEmitterPass>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
void SwingSchedulerDAG::setMII(unsigned ResMII, unsigned RecMII) {
if (II_setByPragma > 0)
MII = II_setByPragma;
@@ -705,14 +716,13 @@ static bool isDependenceBarrier(MachineInstr &MI, AliasAnalysis *AA) {
/// This function calls the code in ValueTracking, but first checks that the
/// instruction has a memory operand.
static void getUnderlyingObjects(const MachineInstr *MI,
- SmallVectorImpl<const Value *> &Objs,
- const DataLayout &DL) {
+ SmallVectorImpl<const Value *> &Objs) {
if (!MI->hasOneMemOperand())
return;
MachineMemOperand *MM = *MI->memoperands_begin();
if (!MM->getValue())
return;
- GetUnderlyingObjects(MM->getValue(), Objs, DL);
+ getUnderlyingObjects(MM->getValue(), Objs);
for (const Value *V : Objs) {
if (!isIdentifiedObject(V)) {
Objs.clear();
@@ -736,7 +746,7 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
PendingLoads.clear();
else if (MI.mayLoad()) {
SmallVector<const Value *, 4> Objs;
- getUnderlyingObjects(&MI, Objs, MF.getDataLayout());
+ ::getUnderlyingObjects(&MI, Objs);
if (Objs.empty())
Objs.push_back(UnknownValue);
for (auto V : Objs) {
@@ -745,7 +755,7 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
}
} else if (MI.mayStore()) {
SmallVector<const Value *, 4> Objs;
- getUnderlyingObjects(&MI, Objs, MF.getDataLayout());
+ ::getUnderlyingObjects(&MI, Objs);
if (Objs.empty())
Objs.push_back(UnknownValue);
for (auto V : Objs) {
@@ -803,10 +813,8 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
continue;
}
AliasResult AAResult = AA->alias(
- MemoryLocation(MMO1->getValue(), LocationSize::unknown(),
- MMO1->getAAInfo()),
- MemoryLocation(MMO2->getValue(), LocationSize::unknown(),
- MMO2->getAAInfo()));
+ MemoryLocation::getAfter(MMO1->getValue(), MMO1->getAAInfo()),
+ MemoryLocation::getAfter(MMO2->getValue(), MMO2->getAAInfo()));
if (AAResult != NoAlias) {
SDep Dep(Load, SDep::Barrier);
@@ -1587,12 +1595,12 @@ static bool computePath(SUnit *Cur, SetVector<SUnit *> &Path,
SmallPtrSet<SUnit *, 8> &Visited) {
if (Cur->isBoundaryNode())
return false;
- if (Exclude.count(Cur) != 0)
+ if (Exclude.contains(Cur))
return false;
- if (DestNodes.count(Cur) != 0)
+ if (DestNodes.contains(Cur))
return true;
if (!Visited.insert(Cur).second)
- return Path.count(Cur) != 0;
+ return Path.contains(Cur);
bool FoundPath = false;
for (auto &SI : Cur->Succs)
FoundPath |= computePath(SI.getSUnit(), Path, DestNodes, Exclude, Visited);
@@ -1632,7 +1640,8 @@ static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker,
if (Register::isVirtualRegister(Reg))
Uses.insert(Reg);
else if (MRI.isAllocatable(Reg))
- for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
+ for (MCRegUnitIterator Units(Reg.asMCReg(), TRI); Units.isValid();
+ ++Units)
Uses.insert(*Units);
}
}
@@ -1645,7 +1654,8 @@ static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker,
LiveOutRegs.push_back(RegisterMaskPair(Reg,
LaneBitmask::getNone()));
} else if (MRI.isAllocatable(Reg)) {
- for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
+ for (MCRegUnitIterator Units(Reg.asMCReg(), TRI); Units.isValid();
+ ++Units)
if (!Uses.count(*Units))
LiveOutRegs.push_back(RegisterMaskPair(*Units,
LaneBitmask::getNone()));
@@ -1741,7 +1751,6 @@ void SwingSchedulerDAG::checkNodeSets(NodeSetType &NodeSets) {
}
NodeSets.clear();
LLVM_DEBUG(dbgs() << "Clear recurrence node-sets\n");
- return;
}
/// Add the nodes that do not belong to a recurrence set into groups
@@ -1946,7 +1955,7 @@ void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
for (const auto &I : maxHeight->Succs) {
if (Nodes.count(I.getSUnit()) == 0)
continue;
- if (NodeOrder.count(I.getSUnit()) != 0)
+ if (NodeOrder.contains(I.getSUnit()))
continue;
if (ignoreDependence(I, false))
continue;
@@ -1958,7 +1967,7 @@ void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
continue;
if (Nodes.count(I.getSUnit()) == 0)
continue;
- if (NodeOrder.count(I.getSUnit()) != 0)
+ if (NodeOrder.contains(I.getSUnit()))
continue;
R.insert(I.getSUnit());
}
@@ -1997,7 +2006,7 @@ void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
for (const auto &I : maxDepth->Preds) {
if (Nodes.count(I.getSUnit()) == 0)
continue;
- if (NodeOrder.count(I.getSUnit()) != 0)
+ if (NodeOrder.contains(I.getSUnit()))
continue;
R.insert(I.getSUnit());
}
@@ -2007,7 +2016,7 @@ void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
continue;
if (Nodes.count(I.getSUnit()) == 0)
continue;
- if (NodeOrder.count(I.getSUnit()) != 0)
+ if (NodeOrder.contains(I.getSUnit()))
continue;
R.insert(I.getSUnit());
}
@@ -2270,7 +2279,7 @@ void SwingSchedulerDAG::applyInstrChange(MachineInstr *MI,
/// Return the instruction in the loop that defines the register.
/// If the definition is a Phi, then follow the Phi operand to
/// the instruction in the loop.
-MachineInstr *SwingSchedulerDAG::findDefInLoop(unsigned Reg) {
+MachineInstr *SwingSchedulerDAG::findDefInLoop(Register Reg) {
SmallPtrSet<MachineInstr *, 8> Visited;
MachineInstr *Def = MRI.getVRegDef(Reg);
while (Def->isPHI()) {
@@ -2943,7 +2952,7 @@ void SMSchedule::finalizeSchedule(SwingSchedulerDAG *SSD) {
}
// Replace the old order with the new order.
cycleInstrs.swap(newOrderPhi);
- cycleInstrs.insert(cycleInstrs.end(), newOrderI.begin(), newOrderI.end());
+ llvm::append_range(cycleInstrs, newOrderI);
SSD->fixupRegisterOverlaps(cycleInstrs);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp
index 4c733738840a..5325eda9d478 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp
@@ -417,17 +417,11 @@ MachineInstr *MachineRegisterInfo::getUniqueVRegDef(Register Reg) const {
}
bool MachineRegisterInfo::hasOneNonDBGUse(Register RegNo) const {
- use_nodbg_iterator UI = use_nodbg_begin(RegNo);
- if (UI == use_nodbg_end())
- return false;
- return ++UI == use_nodbg_end();
+ return hasSingleElement(use_nodbg_operands(RegNo));
}
bool MachineRegisterInfo::hasOneNonDBGUser(Register RegNo) const {
- use_instr_nodbg_iterator UI = use_instr_nodbg_begin(RegNo);
- if (UI == use_instr_nodbg_end())
- return false;
- return ++UI == use_instr_nodbg_end();
+ return hasSingleElement(use_nodbg_instructions(RegNo));
}
/// clearKillFlags - Iterate over all the uses of the given register and
@@ -532,13 +526,6 @@ bool MachineRegisterInfo::isConstantPhysReg(MCRegister PhysReg) const {
return true;
}
-bool
-MachineRegisterInfo::isCallerPreservedOrConstPhysReg(MCRegister PhysReg) const {
- const TargetRegisterInfo *TRI = getTargetRegisterInfo();
- return isConstantPhysReg(PhysReg) ||
- TRI->isCallerPreservedPhysReg(PhysReg, *MF);
-}
-
/// markUsesInDebugValueAsUndef - Mark every DBG_VALUE referencing the
/// specified register as undefined which causes the DBG_VALUE to be
/// deleted during LiveDebugVariables analysis.
@@ -630,8 +617,7 @@ void MachineRegisterInfo::disableCalleeSavedRegister(MCRegister Reg) {
// Remove the register (and its aliases from the list).
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
- UpdatedCSRs.erase(std::remove(UpdatedCSRs.begin(), UpdatedCSRs.end(), *AI),
- UpdatedCSRs.end());
+ llvm::erase_value(UpdatedCSRs, *AI);
}
const MCPhysReg *MachineRegisterInfo::getCalleeSavedRegs() const {
@@ -645,8 +631,7 @@ void MachineRegisterInfo::setCalleeSavedRegs(ArrayRef<MCPhysReg> CSRs) {
if (IsUpdatedCSRsInitialized)
UpdatedCSRs.clear();
- for (MCPhysReg Reg : CSRs)
- UpdatedCSRs.push_back(Reg);
+ append_range(UpdatedCSRs, CSRs);
// Zero value represents the end of the register list
// (no more registers should be pushed).
@@ -660,7 +645,7 @@ bool MachineRegisterInfo::isReservedRegUnit(unsigned Unit) const {
bool IsRootReserved = true;
for (MCSuperRegIterator Super(*Root, TRI, /*IncludeSelf=*/true);
Super.isValid(); ++Super) {
- unsigned Reg = *Super;
+ MCRegister Reg = *Super;
if (!isReserved(Reg)) {
IsRootReserved = false;
break;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineSSAUpdater.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineSSAUpdater.cpp
index b12557d6d326..462082df5d05 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineSSAUpdater.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineSSAUpdater.cpp
@@ -50,15 +50,18 @@ MachineSSAUpdater::~MachineSSAUpdater() {
}
/// Initialize - Reset this object to get ready for a new set of SSA
-/// updates. ProtoValue is the value used to name PHI nodes.
-void MachineSSAUpdater::Initialize(Register V) {
+/// updates.
+void MachineSSAUpdater::Initialize(const TargetRegisterClass *RC) {
if (!AV)
AV = new AvailableValsTy();
else
getAvailableVals(AV).clear();
- VR = V;
- VRC = MRI->getRegClass(VR);
+ VRC = RC;
+}
+
+void MachineSSAUpdater::Initialize(Register V) {
+ Initialize(MRI->getRegClass(V));
}
/// HasValueForBlock - Return true if the MachineSSAUpdater already has a value for
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp
index cf75d531deb2..8d51bb26103a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/PriorityQueue.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/LiveInterval.h"
@@ -73,6 +74,8 @@ using namespace llvm;
#define DEBUG_TYPE "machine-scheduler"
+STATISTIC(NumClustered, "Number of load/store pairs clustered");
+
namespace llvm {
cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
@@ -126,6 +129,15 @@ static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden,
cl::desc("Enable memop clustering."),
cl::init(true));
+static cl::opt<bool>
+ ForceFastCluster("force-fast-cluster", cl::Hidden,
+ cl::desc("Switch to fast cluster algorithm with the lost "
+ "of some fusion opportunities"),
+ cl::init(false));
+static cl::opt<unsigned>
+ FastClusterThreshold("fast-cluster-threshold", cl::Hidden,
+ cl::desc("The threshold for fast cluster"),
+ cl::init(1000));
// DAG subtrees must have at least this many nodes.
static const unsigned MinSubtreeSize = 8;
@@ -228,8 +240,13 @@ char PostMachineScheduler::ID = 0;
char &llvm::PostMachineSchedulerID = PostMachineScheduler::ID;
-INITIALIZE_PASS(PostMachineScheduler, "postmisched",
- "PostRA Machine Instruction Scheduler", false, false)
+INITIALIZE_PASS_BEGIN(PostMachineScheduler, "postmisched",
+ "PostRA Machine Instruction Scheduler", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_END(PostMachineScheduler, "postmisched",
+ "PostRA Machine Instruction Scheduler", false, false)
PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(ID) {
initializePostMachineSchedulerPass(*PassRegistry::getPassRegistry());
@@ -1098,7 +1115,7 @@ updateScheduledPressure(const SUnit *SU,
void ScheduleDAGMILive::updatePressureDiffs(
ArrayRef<RegisterMaskPair> LiveUses) {
for (const RegisterMaskPair &P : LiveUses) {
- unsigned Reg = P.RegUnit;
+ Register Reg = P.RegUnit;
/// FIXME: Currently assuming single-use physregs.
if (!Register::isVirtualRegister(Reg))
continue;
@@ -1298,7 +1315,7 @@ void ScheduleDAGMILive::computeDFSResult() {
/// The cyclic path estimation identifies a def-use pair that crosses the back
/// edge and considers the depth and height of the nodes. For example, consider
/// the following instruction sequence where each instruction has unit latency
-/// and defines an epomymous virtual register:
+/// and defines an eponymous virtual register:
///
/// a->b(a,c)->c(b)->d(c)->exit
///
@@ -1323,7 +1340,7 @@ unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {
unsigned MaxCyclicLatency = 0;
// Visit each live out vreg def to find def/use pairs that cross iterations.
for (const RegisterMaskPair &P : RPTracker.getPressure().LiveOutRegs) {
- unsigned Reg = P.RegUnit;
+ Register Reg = P.RegUnit;
if (!Register::isVirtualRegister(Reg))
continue;
const LiveInterval &LI = LIS->getInterval(Reg);
@@ -1527,7 +1544,12 @@ public:
void apply(ScheduleDAGInstrs *DAGInstrs) override;
protected:
- void clusterNeighboringMemOps(ArrayRef<SUnit *> MemOps, ScheduleDAGInstrs *DAG);
+ void clusterNeighboringMemOps(ArrayRef<MemOpInfo> MemOps, bool FastCluster,
+ ScheduleDAGInstrs *DAG);
+ void collectMemOpRecords(std::vector<SUnit> &SUnits,
+ SmallVectorImpl<MemOpInfo> &MemOpRecords);
+ bool groupMemOps(ArrayRef<MemOpInfo> MemOps, ScheduleDAGInstrs *DAG,
+ DenseMap<unsigned, SmallVector<MemOpInfo, 32>> &Groups);
};
class StoreClusterMutation : public BaseMemOpClusterMutation {
@@ -1563,109 +1585,179 @@ createStoreClusterDAGMutation(const TargetInstrInfo *TII,
} // end namespace llvm
+// Sorting all the loads/stores first, then for each load/store, checking the
+// following load/store one by one, until reach the first non-dependent one and
+// call target hook to see if they can cluster.
+// If FastCluster is enabled, we assume that, all the loads/stores have been
+// preprocessed and now, they didn't have dependencies on each other.
void BaseMemOpClusterMutation::clusterNeighboringMemOps(
- ArrayRef<SUnit *> MemOps, ScheduleDAGInstrs *DAG) {
- SmallVector<MemOpInfo, 32> MemOpRecords;
- for (SUnit *SU : MemOps) {
- const MachineInstr &MI = *SU->getInstr();
- SmallVector<const MachineOperand *, 4> BaseOps;
- int64_t Offset;
- bool OffsetIsScalable;
- unsigned Width;
- if (TII->getMemOperandsWithOffsetWidth(MI, BaseOps, Offset,
- OffsetIsScalable, Width, TRI)) {
- MemOpRecords.push_back(MemOpInfo(SU, BaseOps, Offset, Width));
-
- LLVM_DEBUG(dbgs() << "Num BaseOps: " << BaseOps.size() << ", Offset: "
- << Offset << ", OffsetIsScalable: " << OffsetIsScalable
- << ", Width: " << Width << "\n");
- }
-#ifndef NDEBUG
- for (auto *Op : BaseOps)
- assert(Op);
-#endif
- }
- if (MemOpRecords.size() < 2)
- return;
-
- llvm::sort(MemOpRecords);
+ ArrayRef<MemOpInfo> MemOpRecords, bool FastCluster,
+ ScheduleDAGInstrs *DAG) {
+ // Keep track of the current cluster length and bytes for each SUnit.
+ DenseMap<unsigned, std::pair<unsigned, unsigned>> SUnit2ClusterInfo;
// At this point, `MemOpRecords` array must hold atleast two mem ops. Try to
// cluster mem ops collected within `MemOpRecords` array.
- unsigned ClusterLength = 1;
- unsigned CurrentClusterBytes = MemOpRecords[0].Width;
for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
// Decision to cluster mem ops is taken based on target dependent logic
auto MemOpa = MemOpRecords[Idx];
- auto MemOpb = MemOpRecords[Idx + 1];
- ++ClusterLength;
- CurrentClusterBytes += MemOpb.Width;
- if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpb.BaseOps, ClusterLength,
- CurrentClusterBytes)) {
- // Current mem ops pair could not be clustered, reset cluster length, and
- // go to next pair
- ClusterLength = 1;
- CurrentClusterBytes = MemOpb.Width;
+
+ // Seek for the next load/store to do the cluster.
+ unsigned NextIdx = Idx + 1;
+ for (; NextIdx < End; ++NextIdx)
+ // Skip if MemOpb has been clustered already or has dependency with
+ // MemOpa.
+ if (!SUnit2ClusterInfo.count(MemOpRecords[NextIdx].SU->NodeNum) &&
+ (FastCluster ||
+ (!DAG->IsReachable(MemOpRecords[NextIdx].SU, MemOpa.SU) &&
+ !DAG->IsReachable(MemOpa.SU, MemOpRecords[NextIdx].SU))))
+ break;
+ if (NextIdx == End)
continue;
+
+ auto MemOpb = MemOpRecords[NextIdx];
+ unsigned ClusterLength = 2;
+ unsigned CurrentClusterBytes = MemOpa.Width + MemOpb.Width;
+ if (SUnit2ClusterInfo.count(MemOpa.SU->NodeNum)) {
+ ClusterLength = SUnit2ClusterInfo[MemOpa.SU->NodeNum].first + 1;
+ CurrentClusterBytes =
+ SUnit2ClusterInfo[MemOpa.SU->NodeNum].second + MemOpb.Width;
}
+ if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpb.BaseOps, ClusterLength,
+ CurrentClusterBytes))
+ continue;
+
SUnit *SUa = MemOpa.SU;
SUnit *SUb = MemOpb.SU;
if (SUa->NodeNum > SUb->NodeNum)
std::swap(SUa, SUb);
// FIXME: Is this check really required?
- if (!DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
- ClusterLength = 1;
- CurrentClusterBytes = MemOpb.Width;
+ if (!DAG->addEdge(SUb, SDep(SUa, SDep::Cluster)))
continue;
- }
LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
<< SUb->NodeNum << ")\n");
-
- // Copy successor edges from SUa to SUb. Interleaving computation
- // dependent on SUa can prevent load combining due to register reuse.
- // Predecessor edges do not need to be copied from SUb to SUa since
- // nearby loads should have effectively the same inputs.
- for (const SDep &Succ : SUa->Succs) {
- if (Succ.getSUnit() == SUb)
- continue;
- LLVM_DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum
- << ")\n");
- DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
+ ++NumClustered;
+
+ if (IsLoad) {
+ // Copy successor edges from SUa to SUb. Interleaving computation
+ // dependent on SUa can prevent load combining due to register reuse.
+ // Predecessor edges do not need to be copied from SUb to SUa since
+ // nearby loads should have effectively the same inputs.
+ for (const SDep &Succ : SUa->Succs) {
+ if (Succ.getSUnit() == SUb)
+ continue;
+ LLVM_DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum
+ << ")\n");
+ DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
+ }
+ } else {
+ // Copy predecessor edges from SUb to SUa to avoid the SUnits that
+ // SUb dependent on scheduled in-between SUb and SUa. Successor edges
+ // do not need to be copied from SUa to SUb since no one will depend
+ // on stores.
+ // Notice that, we don't need to care about the memory dependency as
+ // we won't try to cluster them if they have any memory dependency.
+ for (const SDep &Pred : SUb->Preds) {
+ if (Pred.getSUnit() == SUa)
+ continue;
+ LLVM_DEBUG(dbgs() << " Copy Pred SU(" << Pred.getSUnit()->NodeNum
+ << ")\n");
+ DAG->addEdge(SUa, SDep(Pred.getSUnit(), SDep::Artificial));
+ }
}
+ SUnit2ClusterInfo[MemOpb.SU->NodeNum] = {ClusterLength,
+ CurrentClusterBytes};
+
LLVM_DEBUG(dbgs() << " Curr cluster length: " << ClusterLength
<< ", Curr cluster bytes: " << CurrentClusterBytes
<< "\n");
}
}
-/// Callback from DAG postProcessing to create cluster edges for loads.
-void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAG) {
- // Map DAG NodeNum to a set of dependent MemOps in store chain.
- DenseMap<unsigned, SmallVector<SUnit *, 4>> StoreChains;
- for (SUnit &SU : DAG->SUnits) {
+void BaseMemOpClusterMutation::collectMemOpRecords(
+ std::vector<SUnit> &SUnits, SmallVectorImpl<MemOpInfo> &MemOpRecords) {
+ for (auto &SU : SUnits) {
if ((IsLoad && !SU.getInstr()->mayLoad()) ||
(!IsLoad && !SU.getInstr()->mayStore()))
continue;
+ const MachineInstr &MI = *SU.getInstr();
+ SmallVector<const MachineOperand *, 4> BaseOps;
+ int64_t Offset;
+ bool OffsetIsScalable;
+ unsigned Width;
+ if (TII->getMemOperandsWithOffsetWidth(MI, BaseOps, Offset,
+ OffsetIsScalable, Width, TRI)) {
+ MemOpRecords.push_back(MemOpInfo(&SU, BaseOps, Offset, Width));
+
+ LLVM_DEBUG(dbgs() << "Num BaseOps: " << BaseOps.size() << ", Offset: "
+ << Offset << ", OffsetIsScalable: " << OffsetIsScalable
+ << ", Width: " << Width << "\n");
+ }
+#ifndef NDEBUG
+ for (auto *Op : BaseOps)
+ assert(Op);
+#endif
+ }
+}
+
+bool BaseMemOpClusterMutation::groupMemOps(
+ ArrayRef<MemOpInfo> MemOps, ScheduleDAGInstrs *DAG,
+ DenseMap<unsigned, SmallVector<MemOpInfo, 32>> &Groups) {
+ bool FastCluster =
+ ForceFastCluster ||
+ MemOps.size() * DAG->SUnits.size() / 1000 > FastClusterThreshold;
+
+ for (const auto &MemOp : MemOps) {
unsigned ChainPredID = DAG->SUnits.size();
- for (const SDep &Pred : SU.Preds) {
- if (Pred.isCtrl() && !Pred.isArtificial()) {
- ChainPredID = Pred.getSUnit()->NodeNum;
- break;
+ if (FastCluster) {
+ for (const SDep &Pred : MemOp.SU->Preds) {
+ // We only want to cluster the mem ops that have the same ctrl(non-data)
+ // pred so that they didn't have ctrl dependency for each other. But for
+ // store instrs, we can still cluster them if the pred is load instr.
+ if ((Pred.isCtrl() &&
+ (IsLoad ||
+ (Pred.getSUnit() && Pred.getSUnit()->getInstr()->mayStore()))) &&
+ !Pred.isArtificial()) {
+ ChainPredID = Pred.getSUnit()->NodeNum;
+ break;
+ }
}
- }
- // Insert the SU to corresponding store chain.
- auto &Chain = StoreChains.FindAndConstruct(ChainPredID).second;
- Chain.push_back(&SU);
+ } else
+ ChainPredID = 0;
+
+ Groups[ChainPredID].push_back(MemOp);
}
+ return FastCluster;
+}
- // Iterate over the store chains.
- for (auto &SCD : StoreChains)
- clusterNeighboringMemOps(SCD.second, DAG);
+/// Callback from DAG postProcessing to create cluster edges for loads/stores.
+void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAG) {
+ // Collect all the clusterable loads/stores
+ SmallVector<MemOpInfo, 32> MemOpRecords;
+ collectMemOpRecords(DAG->SUnits, MemOpRecords);
+
+ if (MemOpRecords.size() < 2)
+ return;
+
+ // Put the loads/stores without dependency into the same group with some
+ // heuristic if the DAG is too complex to avoid compiling time blow up.
+ // Notice that, some fusion pair could be lost with this.
+ DenseMap<unsigned, SmallVector<MemOpInfo, 32>> Groups;
+ bool FastCluster = groupMemOps(MemOpRecords, DAG, Groups);
+
+ for (auto &Group : Groups) {
+ // Sorting the loads/stores, so that, we can stop the cluster as early as
+ // possible.
+ llvm::sort(Group.second);
+
+ // Trying to cluster all the neighboring loads/stores.
+ clusterNeighboringMemOps(Group.second, FastCluster, DAG);
+ }
}
//===----------------------------------------------------------------------===//
@@ -2724,7 +2816,11 @@ bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand,
GenericSchedulerBase::SchedCandidate &Cand,
SchedBoundary &Zone) {
if (Zone.isTop()) {
- if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {
+ // Prefer the candidate with the lesser depth, but only if one of them has
+ // depth greater than the total latency scheduled so far, otherwise either
+ // of them could be scheduled now with no stall.
+ if (std::max(TryCand.SU->getDepth(), Cand.SU->getDepth()) >
+ Zone.getScheduledLatency()) {
if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
TryCand, Cand, GenericSchedulerBase::TopDepthReduce))
return true;
@@ -2733,7 +2829,11 @@ bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand,
TryCand, Cand, GenericSchedulerBase::TopPathReduce))
return true;
} else {
- if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {
+ // Prefer the candidate with the lesser height, but only if one of them has
+ // height greater than the total latency scheduled so far, otherwise either
+ // of them could be scheduled now with no stall.
+ if (std::max(TryCand.SU->getHeight(), Cand.SU->getHeight()) >
+ Zone.getScheduledLatency()) {
if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
TryCand, Cand, GenericSchedulerBase::BotHeightReduce))
return true;
@@ -3356,13 +3456,13 @@ ScheduleDAGMILive *llvm::createGenericSchedLive(MachineSchedContext *C) {
return DAG;
}
-static ScheduleDAGInstrs *createConveringSched(MachineSchedContext *C) {
+static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) {
return createGenericSchedLive(C);
}
static MachineSchedRegistry
GenericSchedRegistry("converge", "Standard converging scheduler.",
- createConveringSched);
+ createConvergingSched);
//===----------------------------------------------------------------------===//
// PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
@@ -3736,7 +3836,7 @@ struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
return true;
}
- static bool isNodeHidden(const SUnit *Node) {
+ static bool isNodeHidden(const SUnit *Node, const ScheduleDAG *G) {
if (ViewMISchedCutoff == 0)
return false;
return (Node->Preds.size() > ViewMISchedCutoff
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp
index 5f958bbc31b7..378df1b75e25 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp
@@ -34,6 +34,8 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/RegisterPressure.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
@@ -77,6 +79,18 @@ static cl::opt<unsigned> SplitEdgeProbabilityThreshold(
"splitted critical edge"),
cl::init(40), cl::Hidden);
+static cl::opt<unsigned> SinkLoadInstsPerBlockThreshold(
+ "machine-sink-load-instrs-threshold",
+ cl::desc("Do not try to find alias store for a load if there is a in-path "
+ "block whose instruction number is higher than this threshold."),
+ cl::init(2000), cl::Hidden);
+
+static cl::opt<unsigned> SinkLoadBlocksThreshold(
+ "machine-sink-load-blocks-threshold",
+ cl::desc("Do not try to find alias store for a load if the block number in "
+ "the straight line is higher than this threshold."),
+ cl::init(20), cl::Hidden);
+
STATISTIC(NumSunk, "Number of machine instructions sunk");
STATISTIC(NumSplit, "Number of critical edges split");
STATISTIC(NumCoalesces, "Number of copies coalesced");
@@ -94,6 +108,7 @@ namespace {
MachineBlockFrequencyInfo *MBFI;
const MachineBranchProbabilityInfo *MBPI;
AliasAnalysis *AA;
+ RegisterClassInfo RegClassInfo;
// Remember which edges have been considered for breaking.
SmallSet<std::pair<MachineBasicBlock*, MachineBasicBlock*>, 8>
@@ -127,6 +142,15 @@ namespace {
/// current block.
DenseSet<DebugVariable> SeenDbgVars;
+ std::map<std::pair<MachineBasicBlock *, MachineBasicBlock *>, bool>
+ HasStoreCache;
+ std::map<std::pair<MachineBasicBlock *, MachineBasicBlock *>,
+ std::vector<MachineInstr *>>
+ StoreInstrCache;
+
+ /// Cached BB's register pressure.
+ std::map<MachineBasicBlock *, std::vector<unsigned>> CachedRegisterPressure;
+
public:
static char ID; // Pass identification
@@ -159,6 +183,9 @@ namespace {
MachineBasicBlock *From,
MachineBasicBlock *To);
+ bool hasStoreBetween(MachineBasicBlock *From, MachineBasicBlock *To,
+ MachineInstr &MI);
+
/// Postpone the splitting of the given critical
/// edge (\p From, \p To).
///
@@ -184,12 +211,12 @@ namespace {
/// to the copy source.
void SalvageUnsunkDebugUsersOfCopy(MachineInstr &,
MachineBasicBlock *TargetBlock);
- bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB,
- MachineBasicBlock *DefMBB,
- bool &BreakPHIEdge, bool &LocalUse) const;
+ bool AllUsesDominatedByBlock(Register Reg, MachineBasicBlock *MBB,
+ MachineBasicBlock *DefMBB, bool &BreakPHIEdge,
+ bool &LocalUse) const;
MachineBasicBlock *FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
bool &BreakPHIEdge, AllSuccsCache &AllSuccessors);
- bool isProfitableToSinkTo(unsigned Reg, MachineInstr &MI,
+ bool isProfitableToSinkTo(Register Reg, MachineInstr &MI,
MachineBasicBlock *MBB,
MachineBasicBlock *SuccToSinkTo,
AllSuccsCache &AllSuccessors);
@@ -200,6 +227,8 @@ namespace {
SmallVector<MachineBasicBlock *, 4> &
GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB,
AllSuccsCache &AllSuccessors) const;
+
+ std::vector<unsigned> &getBBRegisterPressure(MachineBasicBlock &MBB);
};
} // end anonymous namespace
@@ -253,12 +282,11 @@ bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr &MI,
/// occur in blocks dominated by the specified block. If any use is in the
/// definition block, then return false since it is never legal to move def
/// after uses.
-bool
-MachineSinking::AllUsesDominatedByBlock(unsigned Reg,
- MachineBasicBlock *MBB,
- MachineBasicBlock *DefMBB,
- bool &BreakPHIEdge,
- bool &LocalUse) const {
+bool MachineSinking::AllUsesDominatedByBlock(Register Reg,
+ MachineBasicBlock *MBB,
+ MachineBasicBlock *DefMBB,
+ bool &BreakPHIEdge,
+ bool &LocalUse) const {
assert(Register::isVirtualRegister(Reg) && "Only makes sense for vregs");
// Ignore debug uses because debug info doesn't affect the code.
@@ -327,6 +355,7 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
MBFI = UseBlockFreqInfo ? &getAnalysis<MachineBlockFrequencyInfo>() : nullptr;
MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ RegClassInfo.runOnMachineFunction(MF);
bool EverMadeChange = false;
@@ -347,11 +376,9 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
<< printMBBReference(*Pair.first) << " -- "
<< printMBBReference(*NewSucc) << " -- "
<< printMBBReference(*Pair.second) << '\n');
- if (MBFI) {
- auto NewSuccFreq = MBFI->getBlockFreq(Pair.first) *
- MBPI->getEdgeProbability(Pair.first, NewSucc);
- MBFI->setBlockFreq(NewSucc, NewSuccFreq.getFrequency());
- }
+ if (MBFI)
+ MBFI->onEdgeSplit(*Pair.first, *NewSucc, *MBPI);
+
MadeChange = true;
++NumSplit;
} else
@@ -362,6 +389,9 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
EverMadeChange = true;
}
+ HasStoreCache.clear();
+ StoreInstrCache.clear();
+
// Now clear any kill flags for recorded registers.
for (auto I : RegsToClearKillFlags)
MRI->clearKillFlags(I);
@@ -419,6 +449,8 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
SeenDbgUsers.clear();
SeenDbgVars.clear();
+ // recalculate the bb register pressure after sinking one BB.
+ CachedRegisterPressure.clear();
return MadeChange;
}
@@ -430,7 +462,7 @@ void MachineSinking::ProcessDbgInst(MachineInstr &MI) {
DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(),
MI.getDebugLoc()->getInlinedAt());
- bool SeenBefore = SeenDbgVars.count(Var) != 0;
+ bool SeenBefore = SeenDbgVars.contains(Var);
MachineOperand &MO = MI.getDebugOperand(0);
if (MO.isReg() && MO.getReg().isVirtual())
@@ -561,8 +593,44 @@ bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI,
return true;
}
+std::vector<unsigned> &
+MachineSinking::getBBRegisterPressure(MachineBasicBlock &MBB) {
+ // Currently to save compiling time, MBB's register pressure will not change
+ // in one ProcessBlock iteration because of CachedRegisterPressure. but MBB's
+ // register pressure is changed after sinking any instructions into it.
+ // FIXME: need a accurate and cheap register pressure estiminate model here.
+ auto RP = CachedRegisterPressure.find(&MBB);
+ if (RP != CachedRegisterPressure.end())
+ return RP->second;
+
+ RegionPressure Pressure;
+ RegPressureTracker RPTracker(Pressure);
+
+ // Initialize the register pressure tracker.
+ RPTracker.init(MBB.getParent(), &RegClassInfo, nullptr, &MBB, MBB.end(),
+ /*TrackLaneMasks*/ false, /*TrackUntiedDefs=*/true);
+
+ for (MachineBasicBlock::iterator MII = MBB.instr_end(),
+ MIE = MBB.instr_begin();
+ MII != MIE; --MII) {
+ MachineInstr &MI = *std::prev(MII);
+ if (MI.isDebugValue() || MI.isDebugLabel())
+ continue;
+ RegisterOperands RegOpers;
+ RegOpers.collect(MI, *TRI, *MRI, false, false);
+ RPTracker.recedeSkipDebugValues();
+ assert(&*RPTracker.getPos() == &MI && "RPTracker sync error!");
+ RPTracker.recede(RegOpers);
+ }
+
+ RPTracker.closeRegion();
+ auto It = CachedRegisterPressure.insert(
+ std::make_pair(&MBB, RPTracker.getPressure().MaxSetPressure));
+ return It.first->second;
+}
+
/// isProfitableToSinkTo - Return true if it is profitable to sink MI.
-bool MachineSinking::isProfitableToSinkTo(unsigned Reg, MachineInstr &MI,
+bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI,
MachineBasicBlock *MBB,
MachineBasicBlock *SuccToSinkTo,
AllSuccsCache &AllSuccessors) {
@@ -598,9 +666,73 @@ bool MachineSinking::isProfitableToSinkTo(unsigned Reg, MachineInstr &MI,
FindSuccToSinkTo(MI, SuccToSinkTo, BreakPHIEdge, AllSuccessors))
return isProfitableToSinkTo(Reg, MI, SuccToSinkTo, MBB2, AllSuccessors);
- // If SuccToSinkTo is final destination and it is a post dominator of current
- // block then it is not profitable to sink MI into SuccToSinkTo block.
- return false;
+ MachineLoop *ML = LI->getLoopFor(MBB);
+
+ // If the instruction is not inside a loop, it is not profitable to sink MI to
+ // a post dominate block SuccToSinkTo.
+ if (!ML)
+ return false;
+
+ auto isRegisterPressureSetExceedLimit = [&](const TargetRegisterClass *RC) {
+ unsigned Weight = TRI->getRegClassWeight(RC).RegWeight;
+ const int *PS = TRI->getRegClassPressureSets(RC);
+ // Get register pressure for block SuccToSinkTo.
+ std::vector<unsigned> BBRegisterPressure =
+ getBBRegisterPressure(*SuccToSinkTo);
+ for (; *PS != -1; PS++)
+ // check if any register pressure set exceeds limit in block SuccToSinkTo
+ // after sinking.
+ if (Weight + BBRegisterPressure[*PS] >=
+ TRI->getRegPressureSetLimit(*MBB->getParent(), *PS))
+ return true;
+ return false;
+ };
+
+ // If this instruction is inside a loop and sinking this instruction can make
+ // more registers live range shorten, it is still prifitable.
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ // Ignore non-register operands.
+ if (!MO.isReg())
+ continue;
+ Register Reg = MO.getReg();
+ if (Reg == 0)
+ continue;
+
+ // Don't handle physical register.
+ if (Register::isPhysicalRegister(Reg))
+ return false;
+
+ // Users for the defs are all dominated by SuccToSinkTo.
+ if (MO.isDef()) {
+ // This def register's live range is shortened after sinking.
+ bool LocalUse = false;
+ if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, MBB, BreakPHIEdge,
+ LocalUse))
+ return false;
+ } else {
+ MachineInstr *DefMI = MRI->getVRegDef(Reg);
+ // DefMI is defined outside of loop. There should be no live range
+ // impact for this operand. Defination outside of loop means:
+ // 1: defination is outside of loop.
+ // 2: defination is in this loop, but it is a PHI in the loop header.
+ if (LI->getLoopFor(DefMI->getParent()) != ML ||
+ (DefMI->isPHI() && LI->isLoopHeader(DefMI->getParent())))
+ continue;
+ // The DefMI is defined inside the loop.
+ // If sinking this operand makes some register pressure set exceed limit,
+ // it is not profitable.
+ if (isRegisterPressureSetExceedLimit(MRI->getRegClass(Reg))) {
+ LLVM_DEBUG(dbgs() << "register pressure exceed limit, not profitable.");
+ return false;
+ }
+ }
+ }
+
+ // If MI is in loop and all its operands are alive across the whole loop or if
+ // no operand sinking make register pressure set exceed limit, it is
+ // profitable to sink MI.
+ return true;
}
/// Get the sorted sequence of successors for this MachineBasicBlock, possibly
@@ -613,8 +745,7 @@ MachineSinking::GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB,
if (Succs != AllSuccessors.end())
return Succs->second;
- SmallVector<MachineBasicBlock *, 4> AllSuccs(MBB->succ_begin(),
- MBB->succ_end());
+ SmallVector<MachineBasicBlock *, 4> AllSuccs(MBB->successors());
// Handle cases where sinking can happen but where the sink point isn't a
// successor. For example:
@@ -876,6 +1007,97 @@ static void performSink(MachineInstr &MI, MachineBasicBlock &SuccToSinkTo,
}
}
+/// hasStoreBetween - check if there is store betweeen straight line blocks From
+/// and To.
+bool MachineSinking::hasStoreBetween(MachineBasicBlock *From,
+ MachineBasicBlock *To, MachineInstr &MI) {
+ // Make sure From and To are in straight line which means From dominates To
+ // and To post dominates From.
+ if (!DT->dominates(From, To) || !PDT->dominates(To, From))
+ return true;
+
+ auto BlockPair = std::make_pair(From, To);
+
+ // Does these two blocks pair be queried before and have a definite cached
+ // result?
+ if (HasStoreCache.find(BlockPair) != HasStoreCache.end())
+ return HasStoreCache[BlockPair];
+
+ if (StoreInstrCache.find(BlockPair) != StoreInstrCache.end())
+ return llvm::any_of(StoreInstrCache[BlockPair], [&](MachineInstr *I) {
+ return I->mayAlias(AA, MI, false);
+ });
+
+ bool SawStore = false;
+ bool HasAliasedStore = false;
+ DenseSet<MachineBasicBlock *> HandledBlocks;
+ DenseSet<MachineBasicBlock *> HandledDomBlocks;
+ // Go through all reachable blocks from From.
+ for (MachineBasicBlock *BB : depth_first(From)) {
+ // We insert the instruction at the start of block To, so no need to worry
+ // about stores inside To.
+ // Store in block From should be already considered when just enter function
+ // SinkInstruction.
+ if (BB == To || BB == From)
+ continue;
+
+ // We already handle this BB in previous iteration.
+ if (HandledBlocks.count(BB))
+ continue;
+
+ HandledBlocks.insert(BB);
+ // To post dominates BB, it must be a path from block From.
+ if (PDT->dominates(To, BB)) {
+ if (!HandledDomBlocks.count(BB))
+ HandledDomBlocks.insert(BB);
+
+ // If this BB is too big or the block number in straight line between From
+ // and To is too big, stop searching to save compiling time.
+ if (BB->size() > SinkLoadInstsPerBlockThreshold ||
+ HandledDomBlocks.size() > SinkLoadBlocksThreshold) {
+ for (auto *DomBB : HandledDomBlocks) {
+ if (DomBB != BB && DT->dominates(DomBB, BB))
+ HasStoreCache[std::make_pair(DomBB, To)] = true;
+ else if(DomBB != BB && DT->dominates(BB, DomBB))
+ HasStoreCache[std::make_pair(From, DomBB)] = true;
+ }
+ HasStoreCache[BlockPair] = true;
+ return true;
+ }
+
+ for (MachineInstr &I : *BB) {
+ // Treat as alias conservatively for a call or an ordered memory
+ // operation.
+ if (I.isCall() || I.hasOrderedMemoryRef()) {
+ for (auto *DomBB : HandledDomBlocks) {
+ if (DomBB != BB && DT->dominates(DomBB, BB))
+ HasStoreCache[std::make_pair(DomBB, To)] = true;
+ else if(DomBB != BB && DT->dominates(BB, DomBB))
+ HasStoreCache[std::make_pair(From, DomBB)] = true;
+ }
+ HasStoreCache[BlockPair] = true;
+ return true;
+ }
+
+ if (I.mayStore()) {
+ SawStore = true;
+ // We still have chance to sink MI if all stores between are not
+ // aliased to MI.
+ // Cache all store instructions, so that we don't need to go through
+ // all From reachable blocks for next load instruction.
+ if (I.mayAlias(AA, MI, false))
+ HasAliasedStore = true;
+ StoreInstrCache[BlockPair].push_back(&I);
+ }
+ }
+ }
+ }
+ // If there is no store at all, cache the result.
+ if (!SawStore)
+ HasStoreCache[BlockPair] = false;
+ return HasAliasedStore;
+}
+
/// SinkInstruction - Determine whether it is safe to sink the specified machine
/// instruction out of its current block into a successor.
bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
@@ -936,8 +1158,9 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
// We cannot sink a load across a critical edge - there may be stores in
// other code paths.
bool TryBreak = false;
- bool store = true;
- if (!MI.isSafeToMove(AA, store)) {
+ bool Store =
+ MI.mayLoad() ? hasStoreBetween(ParentBlock, SuccToSinkTo, MI) : true;
+ if (!MI.isSafeToMove(AA, Store)) {
LLVM_DEBUG(dbgs() << " *** NOTE: Won't sink load along critical edge.\n");
TryBreak = true;
}
@@ -1268,9 +1491,9 @@ static bool hasRegisterDependency(MachineInstr *MI,
return HasRegDependency;
}
-static SmallSet<unsigned, 4> getRegUnits(unsigned Reg,
- const TargetRegisterInfo *TRI) {
- SmallSet<unsigned, 4> RegUnits;
+static SmallSet<MCRegister, 4> getRegUnits(MCRegister Reg,
+ const TargetRegisterInfo *TRI) {
+ SmallSet<MCRegister, 4> RegUnits;
for (auto RI = MCRegUnitIterator(Reg, TRI); RI.isValid(); ++RI)
RegUnits.insert(*RI);
return RegUnits;
@@ -1320,8 +1543,8 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
continue;
// Record debug use of each reg unit.
- SmallSet<unsigned, 4> Units = getRegUnits(MO.getReg(), TRI);
- for (unsigned Reg : Units)
+ SmallSet<MCRegister, 4> Units = getRegUnits(MO.getReg(), TRI);
+ for (MCRegister Reg : Units)
SeenDbgInstrs[Reg].push_back(MI);
}
continue;
@@ -1365,18 +1588,17 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
// recorded which reg units that DBG_VALUEs read, if this instruction
// writes any of those units then the corresponding DBG_VALUEs must sink.
SetVector<MachineInstr *> DbgValsToSinkSet;
- SmallVector<MachineInstr *, 4> DbgValsToSink;
for (auto &MO : MI->operands()) {
if (!MO.isReg() || !MO.isDef())
continue;
- SmallSet<unsigned, 4> Units = getRegUnits(MO.getReg(), TRI);
- for (unsigned Reg : Units)
+ SmallSet<MCRegister, 4> Units = getRegUnits(MO.getReg(), TRI);
+ for (MCRegister Reg : Units)
for (auto *MI : SeenDbgInstrs.lookup(Reg))
DbgValsToSinkSet.insert(MI);
}
- DbgValsToSink.insert(DbgValsToSink.begin(), DbgValsToSinkSet.begin(),
- DbgValsToSinkSet.end());
+ SmallVector<MachineInstr *, 4> DbgValsToSink(DbgValsToSinkSet.begin(),
+ DbgValsToSinkSet.end());
// Clear the kill flag if SrcReg is killed between MI and the end of the
// block.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineStableHash.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineStableHash.cpp
new file mode 100644
index 000000000000..fb14f0a33209
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineStableHash.cpp
@@ -0,0 +1,194 @@
+//===- lib/CodeGen/MachineStableHash.cpp ----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Stable hashing for MachineInstr and MachineOperand. Useful or getting a
+// hash across runs, modules, etc.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineStableHash.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/CodeGen/MIRFormatter.h"
+#include "llvm/CodeGen/MIRPrinter.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/StableHashing.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/IRPrintingPasses.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/ModuleSlotTracker.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetMachine.h"
+
+#define DEBUG_TYPE "machine-stable-hash"
+
+using namespace llvm;
+
+STATISTIC(StableHashBailingMachineBasicBlock,
+ "Number of encountered unsupported MachineOperands that were "
+ "MachineBasicBlocks while computing stable hashes");
+STATISTIC(StableHashBailingConstantPoolIndex,
+ "Number of encountered unsupported MachineOperands that were "
+ "ConstantPoolIndex while computing stable hashes");
+STATISTIC(StableHashBailingTargetIndexNoName,
+ "Number of encountered unsupported MachineOperands that were "
+ "TargetIndex with no name");
+STATISTIC(StableHashBailingGlobalAddress,
+ "Number of encountered unsupported MachineOperands that were "
+ "GlobalAddress while computing stable hashes");
+STATISTIC(StableHashBailingBlockAddress,
+ "Number of encountered unsupported MachineOperands that were "
+ "BlockAddress while computing stable hashes");
+STATISTIC(StableHashBailingMetadataUnsupported,
+ "Number of encountered unsupported MachineOperands that were "
+ "Metadata of an unsupported kind while computing stable hashes");
+
+stable_hash llvm::stableHashValue(const MachineOperand &MO) {
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ if (Register::isVirtualRegister(MO.getReg())) {
+ const MachineRegisterInfo &MRI = MO.getParent()->getMF()->getRegInfo();
+ return MRI.getVRegDef(MO.getReg())->getOpcode();
+ }
+
+ // Register operands don't have target flags.
+ return stable_hash_combine(MO.getType(), MO.getReg(), MO.getSubReg(),
+ MO.isDef());
+ case MachineOperand::MO_Immediate:
+ return stable_hash_combine(MO.getType(), MO.getTargetFlags(), MO.getImm());
+ case MachineOperand::MO_CImmediate:
+ case MachineOperand::MO_FPImmediate: {
+ auto Val = MO.isCImm() ? MO.getCImm()->getValue()
+ : MO.getFPImm()->getValueAPF().bitcastToAPInt();
+ auto ValHash =
+ stable_hash_combine_array(Val.getRawData(), Val.getNumWords());
+ return hash_combine(MO.getType(), MO.getTargetFlags(), ValHash);
+ }
+
+ case MachineOperand::MO_MachineBasicBlock:
+ StableHashBailingMachineBasicBlock++;
+ return 0;
+ case MachineOperand::MO_ConstantPoolIndex:
+ StableHashBailingConstantPoolIndex++;
+ return 0;
+ case MachineOperand::MO_BlockAddress:
+ StableHashBailingBlockAddress++;
+ return 0;
+ case MachineOperand::MO_Metadata:
+ StableHashBailingMetadataUnsupported++;
+ return 0;
+ case MachineOperand::MO_GlobalAddress:
+ StableHashBailingGlobalAddress++;
+ return 0;
+ case MachineOperand::MO_TargetIndex: {
+ if (const char *Name = MO.getTargetIndexName())
+ return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
+ stable_hash_combine_string(Name),
+ MO.getOffset());
+ StableHashBailingTargetIndexNoName++;
+ return 0;
+ }
+
+ case MachineOperand::MO_FrameIndex:
+ case MachineOperand::MO_JumpTableIndex:
+ return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
+ MO.getIndex());
+
+ case MachineOperand::MO_ExternalSymbol:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getOffset(),
+ stable_hash_combine_string(MO.getSymbolName()));
+
+ case MachineOperand::MO_RegisterMask:
+ case MachineOperand::MO_RegisterLiveOut:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getRegMask());
+
+ case MachineOperand::MO_ShuffleMask: {
+ std::vector<llvm::stable_hash> ShuffleMaskHashes;
+
+ llvm::transform(
+ MO.getShuffleMask(), std::back_inserter(ShuffleMaskHashes),
+ [](int S) -> llvm::stable_hash { return llvm::stable_hash(S); });
+
+ return hash_combine(MO.getType(), MO.getTargetFlags(),
+ stable_hash_combine_array(ShuffleMaskHashes.data(),
+ ShuffleMaskHashes.size()));
+ }
+ case MachineOperand::MO_MCSymbol: {
+ auto SymbolName = MO.getMCSymbol()->getName();
+ return hash_combine(MO.getType(), MO.getTargetFlags(),
+ stable_hash_combine_string(SymbolName));
+ }
+ case MachineOperand::MO_CFIIndex:
+ return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
+ MO.getCFIIndex());
+ case MachineOperand::MO_IntrinsicID:
+ return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
+ MO.getIntrinsicID());
+ case MachineOperand::MO_Predicate:
+ return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
+ MO.getPredicate());
+ }
+ llvm_unreachable("Invalid machine operand type");
+}
+
+/// A stable hash value for machine instructions.
+/// Returns 0 if no stable hash could be computed.
+/// The hashing and equality testing functions ignore definitions so this is
+/// useful for CSE, etc.
+stable_hash llvm::stableHashValue(const MachineInstr &MI, bool HashVRegs,
+ bool HashConstantPoolIndices,
+ bool HashMemOperands) {
+ // Build up a buffer of hash code components.
+ SmallVector<stable_hash, 16> HashComponents;
+ HashComponents.reserve(MI.getNumOperands() + MI.getNumMemOperands() + 2);
+ HashComponents.push_back(MI.getOpcode());
+ HashComponents.push_back(MI.getFlags());
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!HashVRegs && MO.isReg() && MO.isDef() &&
+ Register::isVirtualRegister(MO.getReg()))
+ continue; // Skip virtual register defs.
+
+ if (MO.isCPI()) {
+ HashComponents.push_back(stable_hash_combine(
+ MO.getType(), MO.getTargetFlags(), MO.getIndex()));
+ continue;
+ }
+
+ stable_hash StableHash = stableHashValue(MO);
+ if (!StableHash)
+ return 0;
+ HashComponents.push_back(StableHash);
+ }
+
+ for (const auto *Op : MI.memoperands()) {
+ if (!HashMemOperands)
+ break;
+ HashComponents.push_back(static_cast<unsigned>(Op->getSize()));
+ HashComponents.push_back(static_cast<unsigned>(Op->getFlags()));
+ HashComponents.push_back(static_cast<unsigned>(Op->getOffset()));
+ HashComponents.push_back(static_cast<unsigned>(Op->getOrdering()));
+ HashComponents.push_back(static_cast<unsigned>(Op->getAddrSpace()));
+ HashComponents.push_back(static_cast<unsigned>(Op->getSyncScopeID()));
+ HashComponents.push_back(static_cast<unsigned>(Op->getBaseAlign().value()));
+ HashComponents.push_back(static_cast<unsigned>(Op->getFailureOrdering()));
+ }
+
+ return stable_hash_combine_range(HashComponents.begin(),
+ HashComponents.end());
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineTraceMetrics.cpp
index e6b51b7e1e56..8df23b781ffd 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineTraceMetrics.cpp
@@ -701,17 +701,15 @@ static void updatePhysDepsDownwards(const MachineInstr *UseMI,
SmallVectorImpl<DataDep> &Deps,
SparseSet<LiveRegUnit> &RegUnits,
const TargetRegisterInfo *TRI) {
- SmallVector<unsigned, 8> Kills;
+ SmallVector<MCRegister, 8> Kills;
SmallVector<unsigned, 8> LiveDefOps;
for (MachineInstr::const_mop_iterator MI = UseMI->operands_begin(),
ME = UseMI->operands_end(); MI != ME; ++MI) {
const MachineOperand &MO = *MI;
- if (!MO.isReg())
- continue;
- Register Reg = MO.getReg();
- if (!Register::isPhysicalRegister(Reg))
+ if (!MO.isReg() || !MO.getReg().isPhysical())
continue;
+ MCRegister Reg = MO.getReg().asMCReg();
// Track live defs and kills for updating RegUnits.
if (MO.isDef()) {
if (MO.isDead())
@@ -734,13 +732,14 @@ static void updatePhysDepsDownwards(const MachineInstr *UseMI,
// Update RegUnits to reflect live registers after UseMI.
// First kills.
- for (unsigned Kill : Kills)
+ for (MCRegister Kill : Kills)
for (MCRegUnitIterator Units(Kill, TRI); Units.isValid(); ++Units)
RegUnits.erase(*Units);
// Second, live defs.
for (unsigned DefOp : LiveDefOps) {
- for (MCRegUnitIterator Units(UseMI->getOperand(DefOp).getReg(), TRI);
+ for (MCRegUnitIterator Units(UseMI->getOperand(DefOp).getReg().asMCReg(),
+ TRI);
Units.isValid(); ++Units) {
LiveRegUnit &LRU = RegUnits[*Units];
LRU.MI = UseMI;
@@ -766,7 +765,7 @@ computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) {
assert(TBI.HasValidInstrHeights && "Missing height info");
unsigned MaxLen = 0;
for (const LiveInReg &LIR : TBI.LiveIns) {
- if (!Register::isVirtualRegister(LIR.Reg))
+ if (!LIR.Reg.isVirtual())
continue;
const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
// Ignore dependencies outside the current trace.
@@ -912,7 +911,8 @@ static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height,
continue;
// This is a def of Reg. Remove corresponding entries from RegUnits, and
// update MI Height to consider the physreg dependencies.
- for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
+ for (MCRegUnitIterator Units(Reg.asMCReg(), TRI); Units.isValid();
+ ++Units) {
SparseSet<LiveRegUnit>::iterator I = RegUnits.find(*Units);
if (I == RegUnits.end())
continue;
@@ -930,15 +930,15 @@ static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height,
}
// Now we know the height of MI. Update any regunits read.
- for (unsigned i = 0, e = ReadOps.size(); i != e; ++i) {
- Register Reg = MI.getOperand(ReadOps[i]).getReg();
+ for (size_t I = 0, E = ReadOps.size(); I != E; ++I) {
+ MCRegister Reg = MI.getOperand(ReadOps[I]).getReg().asMCReg();
for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
LiveRegUnit &LRU = RegUnits[*Units];
// Set the height to the highest reader of the unit.
if (LRU.Cycle <= Height && LRU.MI != &MI) {
LRU.Cycle = Height;
LRU.MI = &MI;
- LRU.Op = ReadOps[i];
+ LRU.Op = ReadOps[I];
}
}
}
@@ -979,7 +979,7 @@ void MachineTraceMetrics::Ensemble::
addLiveIns(const MachineInstr *DefMI, unsigned DefOp,
ArrayRef<const MachineBasicBlock*> Trace) {
assert(!Trace.empty() && "Trace should contain at least one block");
- unsigned Reg = DefMI->getOperand(DefOp).getReg();
+ Register Reg = DefMI->getOperand(DefOp).getReg();
assert(Register::isVirtualRegister(Reg));
const MachineBasicBlock *DefMBB = DefMI->getParent();
@@ -1027,7 +1027,7 @@ computeInstrHeights(const MachineBasicBlock *MBB) {
if (MBB) {
TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
for (LiveInReg &LI : TBI.LiveIns) {
- if (Register::isVirtualRegister(LI.Reg)) {
+ if (LI.Reg.isVirtual()) {
// For virtual registers, the def latency is included.
unsigned &Height = Heights[MTM.MRI->getVRegDef(LI.Reg)];
if (Height < LI.Height)
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp
index c1a2c4e0bc6e..0f6d9b888f47 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -86,7 +86,7 @@ namespace {
struct MachineVerifier {
MachineVerifier(Pass *pass, const char *b) : PASS(pass), Banner(b) {}
- unsigned verify(MachineFunction &MF);
+ unsigned verify(const MachineFunction &MF);
Pass *const PASS;
const char *Banner;
@@ -102,10 +102,10 @@ namespace {
bool isFunctionRegBankSelected;
bool isFunctionSelected;
- using RegVector = SmallVector<unsigned, 16>;
+ using RegVector = SmallVector<Register, 16>;
using RegMaskVector = SmallVector<const uint32_t *, 4>;
- using RegSet = DenseSet<unsigned>;
- using RegMap = DenseMap<unsigned, const MachineInstr *>;
+ using RegSet = DenseSet<Register>;
+ using RegMap = DenseMap<Register, const MachineInstr *>;
using BlockSet = SmallPtrSet<const MachineBasicBlock *, 8>;
const MachineInstr *FirstNonPHI;
@@ -120,11 +120,10 @@ namespace {
SlotIndex lastIndex;
// Add Reg and any sub-registers to RV
- void addRegWithSubRegs(RegVector &RV, unsigned Reg) {
+ void addRegWithSubRegs(RegVector &RV, Register Reg) {
RV.push_back(Reg);
- if (Register::isPhysicalRegister(Reg))
- for (const MCPhysReg &SubReg : TRI->subregs(Reg))
- RV.push_back(SubReg);
+ if (Reg.isPhysical())
+ append_range(RV, TRI->subregs(Reg.asMCReg()));
}
struct BBInfo {
@@ -132,7 +131,8 @@ namespace {
bool reachable = false;
// Vregs that must be live in because they are used without being
- // defined. Map value is the user.
+ // defined. Map value is the user. vregsLiveIn doesn't include regs
+ // that only are used by PHI nodes.
RegMap vregsLiveIn;
// Regs killed in MBB. They may be defined again, and will then be in both
@@ -158,8 +158,8 @@ namespace {
// Add register to vregsRequired if it belongs there. Return true if
// anything changed.
- bool addRequired(unsigned Reg) {
- if (!Register::isVirtualRegister(Reg))
+ bool addRequired(Register Reg) {
+ if (!Reg.isVirtual())
return false;
if (regsLiveOut.count(Reg))
return false;
@@ -169,7 +169,7 @@ namespace {
// Same for a full set.
bool addRequired(const RegSet &RS) {
bool Changed = false;
- for (unsigned Reg : RS)
+ for (Register Reg : RS)
Changed |= addRequired(Reg);
return Changed;
}
@@ -183,7 +183,7 @@ namespace {
}
// Live-out registers are either in regsLiveOut or vregsPassed.
- bool isLiveOut(unsigned Reg) const {
+ bool isLiveOut(Register Reg) const {
return regsLiveOut.count(Reg) || vregsPassed.count(Reg);
}
};
@@ -191,13 +191,13 @@ namespace {
// Extra register info per MBB.
DenseMap<const MachineBasicBlock*, BBInfo> MBBInfoMap;
- bool isReserved(unsigned Reg) {
- return Reg < regsReserved.size() && regsReserved.test(Reg);
+ bool isReserved(Register Reg) {
+ return Reg.id() < regsReserved.size() && regsReserved.test(Reg.id());
}
- bool isAllocatable(unsigned Reg) const {
- return Reg < TRI->getNumRegs() && TRI->isInAllocatableClass(Reg) &&
- !regsReserved.test(Reg);
+ bool isAllocatable(Register Reg) const {
+ return Reg.id() < TRI->getNumRegs() && TRI->isInAllocatableClass(Reg) &&
+ !regsReserved.test(Reg.id());
}
// Analysis information if available
@@ -225,7 +225,7 @@ namespace {
LLT MOVRegType = LLT{});
void report_context(const LiveInterval &LI) const;
- void report_context(const LiveRange &LR, unsigned VRegUnit,
+ void report_context(const LiveRange &LR, Register VRegUnit,
LaneBitmask LaneMask) const;
void report_context(const LiveRange::Segment &S) const;
void report_context(const VNInfo &VNI) const;
@@ -233,18 +233,19 @@ namespace {
void report_context(MCPhysReg PhysReg) const;
void report_context_liverange(const LiveRange &LR) const;
void report_context_lanemask(LaneBitmask LaneMask) const;
- void report_context_vreg(unsigned VReg) const;
- void report_context_vreg_regunit(unsigned VRegOrUnit) const;
+ void report_context_vreg(Register VReg) const;
+ void report_context_vreg_regunit(Register VRegOrUnit) const;
void verifyInlineAsm(const MachineInstr *MI);
void checkLiveness(const MachineOperand *MO, unsigned MONum);
void checkLivenessAtUse(const MachineOperand *MO, unsigned MONum,
- SlotIndex UseIdx, const LiveRange &LR, unsigned VRegOrUnit,
+ SlotIndex UseIdx, const LiveRange &LR,
+ Register VRegOrUnit,
LaneBitmask LaneMask = LaneBitmask::getNone());
void checkLivenessAtDef(const MachineOperand *MO, unsigned MONum,
- SlotIndex DefIdx, const LiveRange &LR, unsigned VRegOrUnit,
- bool SubRangeCheck = false,
+ SlotIndex DefIdx, const LiveRange &LR,
+ Register VRegOrUnit, bool SubRangeCheck = false,
LaneBitmask LaneMask = LaneBitmask::getNone());
void markReachable(const MachineBasicBlock *MBB);
@@ -255,12 +256,12 @@ namespace {
void verifyLiveVariables();
void verifyLiveIntervals();
void verifyLiveInterval(const LiveInterval&);
- void verifyLiveRangeValue(const LiveRange&, const VNInfo*, unsigned,
+ void verifyLiveRangeValue(const LiveRange &, const VNInfo *, Register,
LaneBitmask);
- void verifyLiveRangeSegment(const LiveRange&,
- const LiveRange::const_iterator I, unsigned,
+ void verifyLiveRangeSegment(const LiveRange &,
+ const LiveRange::const_iterator I, Register,
LaneBitmask);
- void verifyLiveRange(const LiveRange&, unsigned,
+ void verifyLiveRange(const LiveRange &, Register,
LaneBitmask LaneMask = LaneBitmask::getNone());
void verifyStackFrame();
@@ -303,6 +304,19 @@ FunctionPass *llvm::createMachineVerifierPass(const std::string &Banner) {
return new MachineVerifierPass(Banner);
}
+void llvm::verifyMachineFunction(MachineFunctionAnalysisManager *,
+ const std::string &Banner,
+ const MachineFunction &MF) {
+ // TODO: Use MFAM after porting below analyses.
+ // LiveVariables *LiveVars;
+ // LiveIntervals *LiveInts;
+ // LiveStacks *LiveStks;
+ // SlotIndexes *Indexes;
+ unsigned FoundErrors = MachineVerifier(nullptr, Banner.c_str()).verify(MF);
+ if (FoundErrors)
+ report_fatal_error("Found " + Twine(FoundErrors) + " machine code errors.");
+}
+
bool MachineFunction::verify(Pass *p, const char *Banner, bool AbortOnErrors)
const {
MachineFunction &MF = const_cast<MachineFunction&>(*this);
@@ -335,7 +349,7 @@ void MachineVerifier::verifyProperties(const MachineFunction &MF) {
report("Function has NoVRegs property but there are VReg operands", &MF);
}
-unsigned MachineVerifier::verify(MachineFunction &MF) {
+unsigned MachineVerifier::verify(const MachineFunction &MF) {
foundErrors = 0;
this->MF = &MF;
@@ -474,7 +488,7 @@ void MachineVerifier::report(const char *msg, const MachineInstr *MI) {
errs() << "- instruction: ";
if (Indexes && Indexes->hasIndex(*MI))
errs() << Indexes->getInstructionIndex(*MI) << '\t';
- MI->print(errs(), /*SkipOpers=*/true);
+ MI->print(errs(), /*IsStandalone=*/true);
}
void MachineVerifier::report(const char *msg, const MachineOperand *MO,
@@ -494,7 +508,7 @@ void MachineVerifier::report_context(const LiveInterval &LI) const {
errs() << "- interval: " << LI << '\n';
}
-void MachineVerifier::report_context(const LiveRange &LR, unsigned VRegUnit,
+void MachineVerifier::report_context(const LiveRange &LR, Register VRegUnit,
LaneBitmask LaneMask) const {
report_context_liverange(LR);
report_context_vreg_regunit(VRegUnit);
@@ -518,11 +532,11 @@ void MachineVerifier::report_context(MCPhysReg PReg) const {
errs() << "- p. register: " << printReg(PReg, TRI) << '\n';
}
-void MachineVerifier::report_context_vreg(unsigned VReg) const {
+void MachineVerifier::report_context_vreg(Register VReg) const {
errs() << "- v. register: " << printReg(VReg, TRI) << '\n';
}
-void MachineVerifier::report_context_vreg_regunit(unsigned VRegOrUnit) const {
+void MachineVerifier::report_context_vreg_regunit(Register VRegOrUnit) const {
if (Register::isVirtualRegister(VRegOrUnit)) {
report_context_vreg(VRegOrUnit);
} else {
@@ -776,9 +790,7 @@ void MachineVerifier::visitMachineBundleBefore(const MachineInstr *MI) {
}
// Ensure non-terminators don't follow terminators.
- // Ignore predicated terminators formed by if conversion.
- // FIXME: If conversion shouldn't need to violate this rule.
- if (MI->isTerminator() && !TII->isPredicated(*MI)) {
+ if (MI->isTerminator()) {
if (!FirstTerminator)
FirstTerminator = MI;
} else if (FirstTerminator) {
@@ -992,16 +1004,15 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
}
case TargetOpcode::G_PHI: {
LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
- if (!DstTy.isValid() ||
- !std::all_of(MI->operands_begin() + 1, MI->operands_end(),
- [this, &DstTy](const MachineOperand &MO) {
- if (!MO.isReg())
- return true;
- LLT Ty = MRI->getType(MO.getReg());
- if (!Ty.isValid() || (Ty != DstTy))
- return false;
- return true;
- }))
+ if (!DstTy.isValid() || !all_of(drop_begin(MI->operands()),
+ [this, &DstTy](const MachineOperand &MO) {
+ if (!MO.isReg())
+ return true;
+ LLT Ty = MRI->getType(MO.getReg());
+ if (!Ty.isValid() || (Ty != DstTy))
+ return false;
+ return true;
+ }))
report("Generic Instruction G_PHI has operands with incompatible/missing "
"types",
MI);
@@ -1343,20 +1354,7 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
break;
}
}
- switch (IntrID) {
- case Intrinsic::memcpy:
- if (MI->getNumOperands() != 5)
- report("Expected memcpy intrinsic to have 5 operands", MI);
- break;
- case Intrinsic::memmove:
- if (MI->getNumOperands() != 5)
- report("Expected memmove intrinsic to have 5 operands", MI);
- break;
- case Intrinsic::memset:
- if (MI->getNumOperands() != 5)
- report("Expected memset intrinsic to have 5 operands", MI);
- break;
- }
+
break;
}
case TargetOpcode::G_SEXT_INREG: {
@@ -1434,6 +1432,95 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
}
break;
}
+ case TargetOpcode::G_MEMCPY:
+ case TargetOpcode::G_MEMMOVE: {
+ ArrayRef<MachineMemOperand *> MMOs = MI->memoperands();
+ if (MMOs.size() != 2) {
+ report("memcpy/memmove must have 2 memory operands", MI);
+ break;
+ }
+
+ if ((!MMOs[0]->isStore() || MMOs[0]->isLoad()) ||
+ (MMOs[1]->isStore() || !MMOs[1]->isLoad())) {
+ report("wrong memory operand types", MI);
+ break;
+ }
+
+ if (MMOs[0]->getSize() != MMOs[1]->getSize())
+ report("inconsistent memory operand sizes", MI);
+
+ LLT DstPtrTy = MRI->getType(MI->getOperand(0).getReg());
+ LLT SrcPtrTy = MRI->getType(MI->getOperand(1).getReg());
+
+ if (!DstPtrTy.isPointer() || !SrcPtrTy.isPointer()) {
+ report("memory instruction operand must be a pointer", MI);
+ break;
+ }
+
+ if (DstPtrTy.getAddressSpace() != MMOs[0]->getAddrSpace())
+ report("inconsistent store address space", MI);
+ if (SrcPtrTy.getAddressSpace() != MMOs[1]->getAddrSpace())
+ report("inconsistent load address space", MI);
+
+ break;
+ }
+ case TargetOpcode::G_MEMSET: {
+ ArrayRef<MachineMemOperand *> MMOs = MI->memoperands();
+ if (MMOs.size() != 1) {
+ report("memset must have 1 memory operand", MI);
+ break;
+ }
+
+ if ((!MMOs[0]->isStore() || MMOs[0]->isLoad())) {
+ report("memset memory operand must be a store", MI);
+ break;
+ }
+
+ LLT DstPtrTy = MRI->getType(MI->getOperand(0).getReg());
+ if (!DstPtrTy.isPointer()) {
+ report("memset operand must be a pointer", MI);
+ break;
+ }
+
+ if (DstPtrTy.getAddressSpace() != MMOs[0]->getAddrSpace())
+ report("inconsistent memset address space", MI);
+
+ break;
+ }
+ case TargetOpcode::G_VECREDUCE_SEQ_FADD:
+ case TargetOpcode::G_VECREDUCE_SEQ_FMUL: {
+ LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+ LLT Src1Ty = MRI->getType(MI->getOperand(1).getReg());
+ LLT Src2Ty = MRI->getType(MI->getOperand(2).getReg());
+ if (!DstTy.isScalar())
+ report("Vector reduction requires a scalar destination type", MI);
+ if (!Src1Ty.isScalar())
+ report("Sequential FADD/FMUL vector reduction requires a scalar 1st operand", MI);
+ if (!Src2Ty.isVector())
+ report("Sequential FADD/FMUL vector reduction must have a vector 2nd operand", MI);
+ break;
+ }
+ case TargetOpcode::G_VECREDUCE_FADD:
+ case TargetOpcode::G_VECREDUCE_FMUL:
+ case TargetOpcode::G_VECREDUCE_FMAX:
+ case TargetOpcode::G_VECREDUCE_FMIN:
+ case TargetOpcode::G_VECREDUCE_ADD:
+ case TargetOpcode::G_VECREDUCE_MUL:
+ case TargetOpcode::G_VECREDUCE_AND:
+ case TargetOpcode::G_VECREDUCE_OR:
+ case TargetOpcode::G_VECREDUCE_XOR:
+ case TargetOpcode::G_VECREDUCE_SMAX:
+ case TargetOpcode::G_VECREDUCE_SMIN:
+ case TargetOpcode::G_VECREDUCE_UMAX:
+ case TargetOpcode::G_VECREDUCE_UMIN: {
+ LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+ LLT SrcTy = MRI->getType(MI->getOperand(1).getReg());
+ if (!DstTy.isScalar())
+ report("Vector reduction requires a scalar destination type", MI);
+ if (!SrcTy.isVector())
+ report("Vector reduction requires vector source=", MI);
+ break;
+ }
default:
break;
}
@@ -1461,6 +1548,16 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
if (MI->isInlineAsm())
verifyInlineAsm(MI);
+ // Check that unspillable terminators define a reg and have at most one use.
+ if (TII->isUnspillableTerminator(MI)) {
+ if (!MI->getOperand(0).isReg() || !MI->getOperand(0).isDef())
+ report("Unspillable Terminator does not define a reg", MI);
+ Register Def = MI->getOperand(0).getReg();
+ if (Def.isVirtual() &&
+ std::distance(MRI->use_nodbg_begin(Def), MRI->use_nodbg_end()) > 1)
+ report("Unspillable Terminator expected to have at most one use!", MI);
+ }
+
// A fully-formed DBG_VALUE must have a location. Ignore partially formed
// DBG_VALUEs: these are convenient to use in tests, but should never get
// generated.
@@ -1468,6 +1565,11 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
if (!MI->getDebugLoc())
report("Missing DebugLoc for debug instruction", MI);
+ // Meta instructions should never be the subject of debug value tracking,
+ // they don't create a value in the output program at all.
+ if (MI->isMetaInstruction() && MI->peekDebugInstrNum())
+ report("Metadata instruction should not have a value tracking number", MI);
+
// Check the MachineMemOperands for basic consistency.
for (MachineMemOperand *Op : MI->memoperands()) {
if (Op->isLoad() && !MI->mayLoad())
@@ -1543,6 +1645,10 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
}
auto VerifyStackMapConstant = [&](unsigned Offset) {
+ if (Offset >= MI->getNumOperands()) {
+ report("stack map constant to STATEPOINT is out of range!", MI);
+ return;
+ }
if (!MI->getOperand(Offset - 1).isImm() ||
MI->getOperand(Offset - 1).getImm() != StackMaps::ConstantOp ||
!MI->getOperand(Offset).isImm())
@@ -1551,6 +1657,25 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
VerifyStackMapConstant(SO.getCCIdx());
VerifyStackMapConstant(SO.getFlagsIdx());
VerifyStackMapConstant(SO.getNumDeoptArgsIdx());
+ VerifyStackMapConstant(SO.getNumGCPtrIdx());
+ VerifyStackMapConstant(SO.getNumAllocaIdx());
+ VerifyStackMapConstant(SO.getNumGcMapEntriesIdx());
+
+ // Verify that all explicit statepoint defs are tied to gc operands as
+ // they are expected to be a relocation of gc operands.
+ unsigned FirstGCPtrIdx = SO.getFirstGCPtrIdx();
+ unsigned LastGCPtrIdx = SO.getNumAllocaIdx() - 2;
+ for (unsigned Idx = 0; Idx < MI->getNumDefs(); Idx++) {
+ unsigned UseOpIdx;
+ if (!MI->isRegTiedToUseOperand(Idx, &UseOpIdx)) {
+ report("STATEPOINT defs expected to be tied", MI);
+ break;
+ }
+ if (UseOpIdx < FirstGCPtrIdx || UseOpIdx > LastGCPtrIdx) {
+ report("STATEPOINT def tied to non-gc operand", MI);
+ break;
+ }
+ }
// TODO: verify we have properly encoded deopt arguments
} break;
@@ -1865,8 +1990,10 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
}
void MachineVerifier::checkLivenessAtUse(const MachineOperand *MO,
- unsigned MONum, SlotIndex UseIdx, const LiveRange &LR, unsigned VRegOrUnit,
- LaneBitmask LaneMask) {
+ unsigned MONum, SlotIndex UseIdx,
+ const LiveRange &LR,
+ Register VRegOrUnit,
+ LaneBitmask LaneMask) {
LiveQueryResult LRQ = LR.Query(UseIdx);
// Check if we have a segment at the use, note however that we only need one
// live subregister range, the others may be dead.
@@ -1887,8 +2014,11 @@ void MachineVerifier::checkLivenessAtUse(const MachineOperand *MO,
}
void MachineVerifier::checkLivenessAtDef(const MachineOperand *MO,
- unsigned MONum, SlotIndex DefIdx, const LiveRange &LR, unsigned VRegOrUnit,
- bool SubRangeCheck, LaneBitmask LaneMask) {
+ unsigned MONum, SlotIndex DefIdx,
+ const LiveRange &LR,
+ Register VRegOrUnit,
+ bool SubRangeCheck,
+ LaneBitmask LaneMask) {
if (const VNInfo *VNI = LR.getVNInfoAt(DefIdx)) {
assert(VNI && "NULL valno is not allowed");
if (VNI->def != DefIdx) {
@@ -1932,7 +2062,7 @@ void MachineVerifier::checkLivenessAtDef(const MachineOperand *MO,
void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
const MachineInstr *MI = MO->getParent();
- const unsigned Reg = MO->getReg();
+ const Register Reg = MO->getReg();
// Both use and def operands can read a register.
if (MO->readsReg()) {
@@ -1950,8 +2080,9 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
if (LiveInts && !LiveInts->isNotInMIMap(*MI)) {
SlotIndex UseIdx = LiveInts->getInstructionIndex(*MI);
// Check the cached regunit intervals.
- if (Register::isPhysicalRegister(Reg) && !isReserved(Reg)) {
- for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
+ if (Reg.isPhysical() && !isReserved(Reg)) {
+ for (MCRegUnitIterator Units(Reg.asMCReg(), TRI); Units.isValid();
+ ++Units) {
if (MRI->isReservedRegUnit(*Units))
continue;
if (const LiveRange *LR = LiveInts->getCachedRegUnit(*Units))
@@ -2097,9 +2228,9 @@ void MachineVerifier::visitMachineBundleAfter(const MachineInstr *MI) {
// Kill any masked registers.
while (!regMasks.empty()) {
const uint32_t *Mask = regMasks.pop_back_val();
- for (unsigned Reg : regsLive)
- if (Register::isPhysicalRegister(Reg) &&
- MachineOperand::clobbersPhysReg(Mask, Reg))
+ for (Register Reg : regsLive)
+ if (Reg.isPhysical() &&
+ MachineOperand::clobbersPhysReg(Mask, Reg.asMCReg()))
regsDead.push_back(Reg);
}
set_subtract(regsLive, regsDead); regsDead.clear();
@@ -2132,7 +2263,7 @@ struct VRegFilter {
// Add elements to the filter itself. \pre Input set \p FromRegSet must have
// no duplicates. Both virtual and physical registers are fine.
template <typename RegSetT> void add(const RegSetT &FromRegSet) {
- SmallVector<unsigned, 0> VRegsBuffer;
+ SmallVector<Register, 0> VRegsBuffer;
filterAndAdd(FromRegSet, VRegsBuffer);
}
// Filter \p FromRegSet through the filter and append passed elements into \p
@@ -2140,13 +2271,13 @@ struct VRegFilter {
// \returns true if anything changed.
template <typename RegSetT>
bool filterAndAdd(const RegSetT &FromRegSet,
- SmallVectorImpl<unsigned> &ToVRegs) {
+ SmallVectorImpl<Register> &ToVRegs) {
unsigned SparseUniverse = Sparse.size();
unsigned NewSparseUniverse = SparseUniverse;
unsigned NewDenseSize = Dense.size();
size_t Begin = ToVRegs.size();
- for (unsigned Reg : FromRegSet) {
- if (!Register::isVirtualRegister(Reg))
+ for (Register Reg : FromRegSet) {
+ if (!Reg.isVirtual())
continue;
unsigned Index = Register::virtReg2Index(Reg);
if (Index < SparseUniverseMax) {
@@ -2170,7 +2301,7 @@ struct VRegFilter {
Sparse.resize(NewSparseUniverse);
Dense.reserve(NewDenseSize);
for (unsigned I = Begin; I < End; ++I) {
- unsigned Reg = ToVRegs[I];
+ Register Reg = ToVRegs[I];
unsigned Index = Register::virtReg2Index(Reg);
if (Index < SparseUniverseMax)
Sparse.set(Index);
@@ -2203,7 +2334,7 @@ private:
// universe). filter_b implicitly contains all physical registers at all times.
class FilteringVRegSet {
VRegFilter Filter;
- SmallVector<unsigned, 0> VRegs;
+ SmallVector<Register, 0> VRegs;
public:
// Set-up the filter_b. \pre Input register set \p RS must have no duplicates.
@@ -2229,63 +2360,28 @@ public:
// can pass through an MBB live, but may not be live every time. It is assumed
// that all vregsPassed sets are empty before the call.
void MachineVerifier::calcRegsPassed() {
- // This is a forward dataflow, doing it in RPO. A standard map serves as a
- // priority (sorting by RPO number) queue, deduplicating worklist, and an RPO
- // number to MBB mapping all at once.
- std::map<unsigned, const MachineBasicBlock *> RPOWorklist;
- DenseMap<const MachineBasicBlock *, unsigned> RPONumbers;
- if (MF->empty()) {
+ if (MF->empty())
// ReversePostOrderTraversal doesn't handle empty functions.
return;
- }
- std::vector<FilteringVRegSet> VRegsPassedSets(MF->size());
- for (const MachineBasicBlock *MBB :
- ReversePostOrderTraversal<const MachineFunction *>(MF)) {
- // Careful with the evaluation order, fetch next number before allocating.
- unsigned Number = RPONumbers.size();
- RPONumbers[MBB] = Number;
- // Set-up the transfer functions for all blocks.
- const BBInfo &MInfo = MBBInfoMap[MBB];
- VRegsPassedSets[Number].addToFilter(MInfo.regsKilled);
- VRegsPassedSets[Number].addToFilter(MInfo.regsLiveOut);
- }
- // First push live-out regs to successors' vregsPassed. Remember the MBBs that
- // have any vregsPassed.
- for (const MachineBasicBlock &MBB : *MF) {
- const BBInfo &MInfo = MBBInfoMap[&MBB];
- if (!MInfo.reachable)
- continue;
- for (const MachineBasicBlock *Succ : MBB.successors()) {
- unsigned SuccNumber = RPONumbers[Succ];
- FilteringVRegSet &SuccSet = VRegsPassedSets[SuccNumber];
- if (SuccSet.add(MInfo.regsLiveOut))
- RPOWorklist.emplace(SuccNumber, Succ);
- }
- }
- // Iteratively push vregsPassed to successors.
- while (!RPOWorklist.empty()) {
- auto Next = RPOWorklist.begin();
- const MachineBasicBlock *MBB = Next->second;
- RPOWorklist.erase(Next);
- FilteringVRegSet &MSet = VRegsPassedSets[RPONumbers[MBB]];
- for (const MachineBasicBlock *Succ : MBB->successors()) {
- if (Succ == MBB)
+ for (const MachineBasicBlock *MB :
+ ReversePostOrderTraversal<const MachineFunction *>(MF)) {
+ FilteringVRegSet VRegs;
+ BBInfo &Info = MBBInfoMap[MB];
+ assert(Info.reachable);
+
+ VRegs.addToFilter(Info.regsKilled);
+ VRegs.addToFilter(Info.regsLiveOut);
+ for (const MachineBasicBlock *Pred : MB->predecessors()) {
+ const BBInfo &PredInfo = MBBInfoMap[Pred];
+ if (!PredInfo.reachable)
continue;
- unsigned SuccNumber = RPONumbers[Succ];
- FilteringVRegSet &SuccSet = VRegsPassedSets[SuccNumber];
- if (SuccSet.add(MSet))
- RPOWorklist.emplace(SuccNumber, Succ);
+
+ VRegs.add(PredInfo.regsLiveOut);
+ VRegs.add(PredInfo.vregsPassed);
}
- }
- // Copy the results back to BBInfos.
- for (const MachineBasicBlock &MBB : *MF) {
- BBInfo &MInfo = MBBInfoMap[&MBB];
- if (!MInfo.reachable)
- continue;
- const FilteringVRegSet &MSet = VRegsPassedSets[RPONumbers[&MBB]];
- MInfo.vregsPassed.reserve(MSet.size());
- MInfo.vregsPassed.insert(MSet.begin(), MSet.end());
+ Info.vregsPassed.reserve(VRegs.size());
+ Info.vregsPassed.insert(VRegs.begin(), VRegs.end());
}
}
@@ -2302,6 +2398,23 @@ void MachineVerifier::calcRegsRequired() {
if (PInfo.addRequired(MInfo.vregsLiveIn))
todo.insert(Pred);
}
+
+ // Handle the PHI node.
+ for (const MachineInstr &MI : MBB.phis()) {
+ for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) {
+ // Skip those Operands which are undef regs or not regs.
+ if (!MI.getOperand(i).isReg() || !MI.getOperand(i).readsReg())
+ continue;
+
+ // Get register and predecessor for one PHI edge.
+ Register Reg = MI.getOperand(i).getReg();
+ const MachineBasicBlock *Pred = MI.getOperand(i + 1).getMBB();
+
+ BBInfo &PInfo = MBBInfoMap[Pred];
+ if (PInfo.addRequired(Reg))
+ todo.insert(Pred);
+ }
+ }
}
// Iteratively push vregsRequired to predecessors. This will converge to the
@@ -2399,7 +2512,7 @@ void MachineVerifier::visitMachineFunctionAfter() {
// Check for killed virtual registers that should be live out.
for (const auto &MBB : *MF) {
BBInfo &MInfo = MBBInfoMap[&MBB];
- for (unsigned VReg : MInfo.vregsRequired)
+ for (Register VReg : MInfo.vregsRequired)
if (MInfo.regsKilled.count(VReg)) {
report("Virtual register killed in block, but needed live out.", &MBB);
errs() << "Virtual register " << printReg(VReg)
@@ -2409,7 +2522,7 @@ void MachineVerifier::visitMachineFunctionAfter() {
if (!MF->empty()) {
BBInfo &MInfo = MBBInfoMap[&MF->front()];
- for (unsigned VReg : MInfo.vregsRequired) {
+ for (Register VReg : MInfo.vregsRequired) {
report("Virtual register defs don't dominate all uses.", MF);
report_context_vreg(VReg);
}
@@ -2449,12 +2562,27 @@ void MachineVerifier::visitMachineFunctionAfter() {
for (auto CSInfo : MF->getCallSitesInfo())
if (!CSInfo.first->isCall())
report("Call site info referencing instruction that is not call", MF);
+
+ // If there's debug-info, check that we don't have any duplicate value
+ // tracking numbers.
+ if (MF->getFunction().getSubprogram()) {
+ DenseSet<unsigned> SeenNumbers;
+ for (auto &MBB : *MF) {
+ for (auto &MI : MBB) {
+ if (auto Num = MI.peekDebugInstrNum()) {
+ auto Result = SeenNumbers.insert((unsigned)Num);
+ if (!Result.second)
+ report("Instruction has a duplicated value tracking number", &MI);
+ }
+ }
+ }
+ }
}
void MachineVerifier::verifyLiveVariables() {
assert(LiveVars && "Don't call verifyLiveVariables without LiveVars");
- for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = Register::index2VirtReg(i);
+ for (unsigned I = 0, E = MRI->getNumVirtRegs(); I != E; ++I) {
+ Register Reg = Register::index2VirtReg(I);
LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg);
for (const auto &MBB : *MF) {
BBInfo &MInfo = MBBInfoMap[&MBB];
@@ -2479,8 +2607,8 @@ void MachineVerifier::verifyLiveVariables() {
void MachineVerifier::verifyLiveIntervals() {
assert(LiveInts && "Don't call verifyLiveIntervals without LiveInts");
- for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = Register::index2VirtReg(i);
+ for (unsigned I = 0, E = MRI->getNumVirtRegs(); I != E; ++I) {
+ Register Reg = Register::index2VirtReg(I);
// Spilling and splitting may leave unused registers around. Skip them.
if (MRI->reg_nodbg_empty(Reg))
@@ -2493,7 +2621,7 @@ void MachineVerifier::verifyLiveIntervals() {
}
const LiveInterval &LI = LiveInts->getInterval(Reg);
- assert(Reg == LI.reg && "Invalid reg to interval mapping");
+ assert(Reg == LI.reg() && "Invalid reg to interval mapping");
verifyLiveInterval(LI);
}
@@ -2504,7 +2632,7 @@ void MachineVerifier::verifyLiveIntervals() {
}
void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR,
- const VNInfo *VNI, unsigned Reg,
+ const VNInfo *VNI, Register Reg,
LaneBitmask LaneMask) {
if (VNI->isUnused())
return;
@@ -2597,8 +2725,8 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR,
void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
const LiveRange::const_iterator I,
- unsigned Reg, LaneBitmask LaneMask)
-{
+ Register Reg,
+ LaneBitmask LaneMask) {
const LiveRange::Segment &S = *I;
const VNInfo *VNI = S.valno;
assert(VNI && "Live segment has no valno");
@@ -2809,7 +2937,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
}
}
-void MachineVerifier::verifyLiveRange(const LiveRange &LR, unsigned Reg,
+void MachineVerifier::verifyLiveRange(const LiveRange &LR, Register Reg,
LaneBitmask LaneMask) {
for (const VNInfo *VNI : LR.valnos)
verifyLiveRangeValue(LR, VNI, Reg, LaneMask);
@@ -2819,7 +2947,7 @@ void MachineVerifier::verifyLiveRange(const LiveRange &LR, unsigned Reg,
}
void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) {
- unsigned Reg = LI.reg;
+ Register Reg = LI.reg();
assert(Register::isVirtualRegister(Reg));
verifyLiveRange(LI, Reg);
@@ -2836,10 +2964,10 @@ void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) {
}
if (SR.empty()) {
report("Subrange must not be empty", MF);
- report_context(SR, LI.reg, SR.LaneMask);
+ report_context(SR, LI.reg(), SR.LaneMask);
}
Mask |= SR.LaneMask;
- verifyLiveRange(SR, LI.reg, SR.LaneMask);
+ verifyLiveRange(SR, LI.reg(), SR.LaneMask);
if (!LI.covers(SR)) {
report("A Subrange is not covered by the main range", MF);
report_context(LI);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp
index d85b1b7988ce..095da09ea82b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp
@@ -11,9 +11,7 @@
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineLoopUtils.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/InitializePasses.h"
#include "llvm/MC/MCContext.h"
#include "llvm/Support/Debug.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MultiHazardRecognizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MultiHazardRecognizer.cpp
new file mode 100644
index 000000000000..e4cd92ac4868
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MultiHazardRecognizer.cpp
@@ -0,0 +1,92 @@
+//===- MultiHazardRecognizer.cpp - Scheduler Support ----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MultiHazardRecognizer class, which is a wrapper
+// for a set of ScheduleHazardRecognizer instances
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MultiHazardRecognizer.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+#include <functional>
+#include <numeric>
+
+using namespace llvm;
+
+void MultiHazardRecognizer::AddHazardRecognizer(
+ std::unique_ptr<ScheduleHazardRecognizer> &&R) {
+ MaxLookAhead = std::max(MaxLookAhead, R->getMaxLookAhead());
+ Recognizers.push_back(std::move(R));
+}
+
+bool MultiHazardRecognizer::atIssueLimit() const {
+ return llvm::any_of(Recognizers,
+ std::mem_fn(&ScheduleHazardRecognizer::atIssueLimit));
+}
+
+ScheduleHazardRecognizer::HazardType
+MultiHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
+ for (auto &R : Recognizers) {
+ auto res = R->getHazardType(SU, Stalls);
+ if (res != NoHazard)
+ return res;
+ }
+ return NoHazard;
+}
+
+void MultiHazardRecognizer::Reset() {
+ for (auto &R : Recognizers)
+ R->Reset();
+}
+
+void MultiHazardRecognizer::EmitInstruction(SUnit *SU) {
+ for (auto &R : Recognizers)
+ R->EmitInstruction(SU);
+}
+
+void MultiHazardRecognizer::EmitInstruction(MachineInstr *MI) {
+ for (auto &R : Recognizers)
+ R->EmitInstruction(MI);
+}
+
+unsigned MultiHazardRecognizer::PreEmitNoops(SUnit *SU) {
+ auto MN = [=](unsigned a, std::unique_ptr<ScheduleHazardRecognizer> &R) {
+ return std::max(a, R->PreEmitNoops(SU));
+ };
+ return std::accumulate(Recognizers.begin(), Recognizers.end(), 0u, MN);
+}
+
+unsigned MultiHazardRecognizer::PreEmitNoops(MachineInstr *MI) {
+ auto MN = [=](unsigned a, std::unique_ptr<ScheduleHazardRecognizer> &R) {
+ return std::max(a, R->PreEmitNoops(MI));
+ };
+ return std::accumulate(Recognizers.begin(), Recognizers.end(), 0u, MN);
+}
+
+bool MultiHazardRecognizer::ShouldPreferAnother(SUnit *SU) {
+ auto SPA = [=](std::unique_ptr<ScheduleHazardRecognizer> &R) {
+ return R->ShouldPreferAnother(SU);
+ };
+ return llvm::any_of(Recognizers, SPA);
+}
+
+void MultiHazardRecognizer::AdvanceCycle() {
+ for (auto &R : Recognizers)
+ R->AdvanceCycle();
+}
+
+void MultiHazardRecognizer::RecedeCycle() {
+ for (auto &R : Recognizers)
+ R->RecedeCycle();
+}
+
+void MultiHazardRecognizer::EmitNoop() {
+ for (auto &R : Recognizers)
+ R->EmitNoop();
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp
index 311b87fa9e3b..8148b64d8443 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp
@@ -101,10 +101,10 @@ namespace {
// These functions are temporary abstractions around LiveVariables and
// LiveIntervals, so they can go away when LiveVariables does.
- bool isLiveIn(unsigned Reg, const MachineBasicBlock *MBB);
- bool isLiveOutPastPHIs(unsigned Reg, const MachineBasicBlock *MBB);
+ bool isLiveIn(Register Reg, const MachineBasicBlock *MBB);
+ bool isLiveOutPastPHIs(Register Reg, const MachineBasicBlock *MBB);
- using BBVRegPair = std::pair<unsigned, unsigned>;
+ using BBVRegPair = std::pair<unsigned, Register>;
using VRegPHIUse = DenseMap<BBVRegPair, unsigned>;
VRegPHIUse VRegPHIUseCount;
@@ -324,21 +324,43 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
// Increment use count of the newly created virtual register.
LV->setPHIJoin(IncomingReg);
- // When we are reusing the incoming register, it may already have been
- // killed in this block. The old kill will also have been inserted at
- // AfterPHIsIt, so it appears before the current PHICopy.
- if (reusedIncoming)
- if (MachineInstr *OldKill = VI.findKill(&MBB)) {
- LLVM_DEBUG(dbgs() << "Remove old kill from " << *OldKill);
- LV->removeVirtualRegisterKilled(IncomingReg, *OldKill);
- LLVM_DEBUG(MBB.dump());
+ MachineInstr *OldKill = nullptr;
+ bool IsPHICopyAfterOldKill = false;
+
+ if (reusedIncoming && (OldKill = VI.findKill(&MBB))) {
+ // Calculate whether the PHICopy is after the OldKill.
+ // In general, the PHICopy is inserted as the first non-phi instruction
+ // by default, so it's before the OldKill. But some Target hooks for
+ // createPHIDestinationCopy() may modify the default insert position of
+ // PHICopy.
+ for (auto I = MBB.SkipPHIsAndLabels(MBB.begin()), E = MBB.end();
+ I != E; ++I) {
+ if (I == PHICopy)
+ break;
+
+ if (I == OldKill) {
+ IsPHICopyAfterOldKill = true;
+ break;
+ }
}
+ }
- // Add information to LiveVariables to know that the incoming value is
- // killed. Note that because the value is defined in several places (once
- // each for each incoming block), the "def" block and instruction fields
- // for the VarInfo is not filled in.
- LV->addVirtualRegisterKilled(IncomingReg, *PHICopy);
+ // When we are reusing the incoming register and it has been marked killed
+ // by OldKill, if the PHICopy is after the OldKill, we should remove the
+ // killed flag from OldKill.
+ if (IsPHICopyAfterOldKill) {
+ LLVM_DEBUG(dbgs() << "Remove old kill from " << *OldKill);
+ LV->removeVirtualRegisterKilled(IncomingReg, *OldKill);
+ LLVM_DEBUG(MBB.dump());
+ }
+
+ // Add information to LiveVariables to know that the first used incoming
+ // value or the resued incoming value whose PHICopy is after the OldKIll
+ // is killed. Note that because the value is defined in several places
+ // (once each for each incoming block), the "def" block and instruction
+ // fields for the VarInfo is not filled in.
+ if (!OldKill || IsPHICopyAfterOldKill)
+ LV->addVirtualRegisterKilled(IncomingReg, *PHICopy);
}
// Since we are going to be deleting the PHI node, if it is the last use of
@@ -372,8 +394,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
}
LiveInterval &DestLI = LIS->getInterval(DestReg);
- assert(DestLI.begin() != DestLI.end() &&
- "PHIs should have nonempty LiveIntervals.");
+ assert(!DestLI.empty() && "PHIs should have nonempty LiveIntervals.");
if (DestLI.endIndex().isDead()) {
// A dead PHI's live range begins and ends at the start of the MBB, but
// the lowered copy, which will still be dead, needs to begin and end at
@@ -420,6 +441,19 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
if (!MBBsInsertedInto.insert(&opBlock).second)
continue; // If the copy has already been emitted, we're done.
+ MachineInstr *SrcRegDef = MRI->getVRegDef(SrcReg);
+ if (SrcRegDef && TII->isUnspillableTerminator(SrcRegDef)) {
+ assert(SrcRegDef->getOperand(0).isReg() &&
+ SrcRegDef->getOperand(0).isDef() &&
+ "Expected operand 0 to be a reg def!");
+ // Now that the PHI's use has been removed (as the instruction was
+ // removed) there should be no other uses of the SrcReg.
+ assert(MRI->use_empty(SrcReg) &&
+ "Expected a single use from UnspillableTerminator");
+ SrcRegDef->getOperand(0).setReg(IncomingReg);
+ continue;
+ }
+
// Find a safe location to insert the copy, this may be the first terminator
// in the block (or end()).
MachineBasicBlock::iterator InsertPos =
@@ -670,7 +704,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
return Changed;
}
-bool PHIElimination::isLiveIn(unsigned Reg, const MachineBasicBlock *MBB) {
+bool PHIElimination::isLiveIn(Register Reg, const MachineBasicBlock *MBB) {
assert((LV || LIS) &&
"isLiveIn() requires either LiveVariables or LiveIntervals");
if (LIS)
@@ -679,7 +713,7 @@ bool PHIElimination::isLiveIn(unsigned Reg, const MachineBasicBlock *MBB) {
return LV->isLiveIn(Reg, *MBB);
}
-bool PHIElimination::isLiveOutPastPHIs(unsigned Reg,
+bool PHIElimination::isLiveOutPastPHIs(Register Reg,
const MachineBasicBlock *MBB) {
assert((LV || LIS) &&
"isLiveOutPastPHIs() requires either LiveVariables or LiveIntervals");
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PHIEliminationUtils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PHIEliminationUtils.cpp
index 2a72717e711d..016335f420d3 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PHIEliminationUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PHIEliminationUtils.cpp
@@ -8,9 +8,9 @@
#include "PHIEliminationUtils.h"
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+
using namespace llvm;
// findCopyInsertPoint - Find a safe place in MBB to insert a copy from SrcReg
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ParallelCG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ParallelCG.cpp
index c19ed1f8f71d..849b667254bd 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ParallelCG.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ParallelCG.cpp
@@ -28,6 +28,8 @@ static void codegen(Module *M, llvm::raw_pwrite_stream &OS,
function_ref<std::unique_ptr<TargetMachine>()> TMFactory,
CodeGenFileType FileType) {
std::unique_ptr<TargetMachine> TM = TMFactory();
+ assert(TM && "Failed to create target machine!");
+
legacy::PassManager CodeGenPasses;
if (TM->addPassesToEmitFile(CodeGenPasses, OS, nullptr, FileType))
report_fatal_error("Failed to setup codegen");
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp
index 4a66863ea803..34ac396c0471 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp
@@ -178,6 +178,11 @@ namespace {
}
}
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties()
+ .set(MachineFunctionProperties::Property::IsSSA);
+ }
+
/// Track Def -> Use info used for rewriting copies.
using RewriteMapTy = SmallDenseMap<RegSubRegPair, ValueTrackerResult>;
@@ -196,41 +201,39 @@ namespace {
SmallPtrSetImpl<MachineInstr *> &LocalMIs);
bool optimizeRecurrence(MachineInstr &PHI);
bool findNextSource(RegSubRegPair RegSubReg, RewriteMapTy &RewriteMap);
- bool isMoveImmediate(MachineInstr &MI,
- SmallSet<unsigned, 4> &ImmDefRegs,
- DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
- bool foldImmediate(MachineInstr &MI, SmallSet<unsigned, 4> &ImmDefRegs,
- DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
+ bool isMoveImmediate(MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs,
+ DenseMap<Register, MachineInstr *> &ImmDefMIs);
+ bool foldImmediate(MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs,
+ DenseMap<Register, MachineInstr *> &ImmDefMIs);
/// Finds recurrence cycles, but only ones that formulated around
/// a def operand and a use operand that are tied. If there is a use
/// operand commutable with the tied use operand, find recurrence cycle
/// along that operand as well.
- bool findTargetRecurrence(unsigned Reg,
- const SmallSet<unsigned, 2> &TargetReg,
+ bool findTargetRecurrence(Register Reg,
+ const SmallSet<Register, 2> &TargetReg,
RecurrenceCycle &RC);
/// If copy instruction \p MI is a virtual register copy, track it in
- /// the set \p CopySrcRegs and \p CopyMIs. If this virtual register was
- /// previously seen as a copy, replace the uses of this copy with the
- /// previously seen copy's destination register.
+ /// the set \p CopyMIs. If this virtual register was previously seen as a
+ /// copy, replace the uses of this copy with the previously seen copy's
+ /// destination register.
bool foldRedundantCopy(MachineInstr &MI,
- SmallSet<unsigned, 4> &CopySrcRegs,
- DenseMap<unsigned, MachineInstr *> &CopyMIs);
+ DenseMap<RegSubRegPair, MachineInstr *> &CopyMIs);
/// Is the register \p Reg a non-allocatable physical register?
- bool isNAPhysCopy(unsigned Reg);
+ bool isNAPhysCopy(Register Reg);
/// If copy instruction \p MI is a non-allocatable virtual<->physical
/// register copy, track it in the \p NAPhysToVirtMIs map. If this
/// non-allocatable physical register was previously copied to a virtual
/// registered and hasn't been clobbered, the virt->phys copy can be
/// deleted.
- bool foldRedundantNAPhysCopy(MachineInstr &MI,
- DenseMap<unsigned, MachineInstr *> &NAPhysToVirtMIs);
+ bool foldRedundantNAPhysCopy(
+ MachineInstr &MI, DenseMap<Register, MachineInstr *> &NAPhysToVirtMIs);
bool isLoadFoldable(MachineInstr &MI,
- SmallSet<unsigned, 16> &FoldAsLoadDefCandidates);
+ SmallSet<Register, 16> &FoldAsLoadDefCandidates);
/// Check whether \p MI is understood by the register coalescer
/// but may require some rewriting.
@@ -291,7 +294,7 @@ namespace {
public:
ValueTrackerResult() = default;
- ValueTrackerResult(unsigned Reg, unsigned SubReg) {
+ ValueTrackerResult(Register Reg, unsigned SubReg) {
addSource(Reg, SubReg);
}
@@ -305,11 +308,11 @@ namespace {
Inst = nullptr;
}
- void addSource(unsigned SrcReg, unsigned SrcSubReg) {
+ void addSource(Register SrcReg, unsigned SrcSubReg) {
RegSrcs.push_back(RegSubRegPair(SrcReg, SrcSubReg));
}
- void setSource(int Idx, unsigned SrcReg, unsigned SrcSubReg) {
+ void setSource(int Idx, Register SrcReg, unsigned SrcSubReg) {
assert(Idx < getNumSources() && "Reg pair source out of index");
RegSrcs[Idx] = RegSubRegPair(SrcReg, SrcSubReg);
}
@@ -320,7 +323,7 @@ namespace {
return RegSrcs[Idx];
}
- unsigned getSrcReg(int Idx) const {
+ Register getSrcReg(int Idx) const {
assert(Idx < getNumSources() && "Reg source out of index");
return RegSrcs[Idx].Reg;
}
@@ -330,7 +333,7 @@ namespace {
return RegSrcs[Idx].SubReg;
}
- bool operator==(const ValueTrackerResult &Other) {
+ bool operator==(const ValueTrackerResult &Other) const {
if (Other.getInst() != getInst())
return false;
@@ -373,7 +376,7 @@ namespace {
unsigned DefSubReg;
/// The register where the value can be found.
- unsigned Reg;
+ Register Reg;
/// MachineRegisterInfo used to perform tracking.
const MachineRegisterInfo &MRI;
@@ -415,11 +418,11 @@ namespace {
/// Indeed, when \p Reg is a physical register that constructor does not
/// know which definition of \p Reg it should track.
/// Use the next constructor to track a physical register.
- ValueTracker(unsigned Reg, unsigned DefSubReg,
+ ValueTracker(Register Reg, unsigned DefSubReg,
const MachineRegisterInfo &MRI,
const TargetInstrInfo *TII = nullptr)
: DefSubReg(DefSubReg), Reg(Reg), MRI(MRI), TII(TII) {
- if (!Register::isPhysicalRegister(Reg)) {
+ if (!Reg.isPhysical()) {
Def = MRI.getVRegDef(Reg);
DefIdx = MRI.def_begin(Reg).getOperandNo();
}
@@ -824,7 +827,7 @@ public:
/// Rewrite the current source with \p NewReg and \p NewSubReg if possible.
/// \return True if the rewriting was possible, false otherwise.
- virtual bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) = 0;
+ virtual bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) = 0;
};
/// Rewriter for COPY instructions.
@@ -852,7 +855,7 @@ public:
return true;
}
- bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override {
+ bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
if (CurrentSrcIdx != 1)
return false;
MachineOperand &MOSrc = CopyLike.getOperand(CurrentSrcIdx);
@@ -897,7 +900,7 @@ public:
return true;
}
- bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override {
+ bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
return false;
}
};
@@ -941,7 +944,7 @@ public:
return true;
}
- bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override {
+ bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
if (CurrentSrcIdx != 2)
return false;
// We are rewriting the inserted reg.
@@ -988,7 +991,7 @@ public:
return true;
}
- bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override {
+ bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
// The only source we can rewrite is the input register.
if (CurrentSrcIdx != 1)
return false;
@@ -1066,7 +1069,7 @@ public:
return MODef.getSubReg() == 0;
}
- bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override {
+ bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override {
// We cannot rewrite out of bound operands.
// Moreover, rewritable sources are at odd positions.
if ((CurrentSrcIdx & 1) != 1 || CurrentSrcIdx > CopyLike.getNumOperands())
@@ -1312,7 +1315,7 @@ bool PeepholeOptimizer::optimizeUncoalescableCopy(
/// We only fold loads to virtual registers and the virtual register defined
/// has a single user.
bool PeepholeOptimizer::isLoadFoldable(
- MachineInstr &MI, SmallSet<unsigned, 16> &FoldAsLoadDefCandidates) {
+ MachineInstr &MI, SmallSet<Register, 16> &FoldAsLoadDefCandidates) {
if (!MI.canFoldAsLoad() || !MI.mayLoad())
return false;
const MCInstrDesc &MCID = MI.getDesc();
@@ -1323,7 +1326,7 @@ bool PeepholeOptimizer::isLoadFoldable(
// To reduce compilation time, we check MRI->hasOneNonDBGUser when inserting
// loads. It should be checked when processing uses of the load, since
// uses can be removed during peephole.
- if (!MI.getOperand(0).getSubReg() && Register::isVirtualRegister(Reg) &&
+ if (Reg.isVirtual() && !MI.getOperand(0).getSubReg() &&
MRI->hasOneNonDBGUser(Reg)) {
FoldAsLoadDefCandidates.insert(Reg);
return true;
@@ -1332,15 +1335,15 @@ bool PeepholeOptimizer::isLoadFoldable(
}
bool PeepholeOptimizer::isMoveImmediate(
- MachineInstr &MI, SmallSet<unsigned, 4> &ImmDefRegs,
- DenseMap<unsigned, MachineInstr *> &ImmDefMIs) {
+ MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs,
+ DenseMap<Register, MachineInstr *> &ImmDefMIs) {
const MCInstrDesc &MCID = MI.getDesc();
if (!MI.isMoveImmediate())
return false;
if (MCID.getNumDefs() != 1)
return false;
Register Reg = MI.getOperand(0).getReg();
- if (Register::isVirtualRegister(Reg)) {
+ if (Reg.isVirtual()) {
ImmDefMIs.insert(std::make_pair(Reg, &MI));
ImmDefRegs.insert(Reg);
return true;
@@ -1352,22 +1355,19 @@ bool PeepholeOptimizer::isMoveImmediate(
/// Try folding register operands that are defined by move immediate
/// instructions, i.e. a trivial constant folding optimization, if
/// and only if the def and use are in the same BB.
-bool PeepholeOptimizer::foldImmediate(MachineInstr &MI,
- SmallSet<unsigned, 4> &ImmDefRegs,
- DenseMap<unsigned, MachineInstr *> &ImmDefMIs) {
+bool PeepholeOptimizer::foldImmediate(
+ MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs,
+ DenseMap<Register, MachineInstr *> &ImmDefMIs) {
for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg() || MO.isDef())
continue;
- // Ignore dead implicit defs.
- if (MO.isImplicit() && MO.isDead())
- continue;
Register Reg = MO.getReg();
- if (!Register::isVirtualRegister(Reg))
+ if (!Reg.isVirtual())
continue;
if (ImmDefRegs.count(Reg) == 0)
continue;
- DenseMap<unsigned, MachineInstr*>::iterator II = ImmDefMIs.find(Reg);
+ DenseMap<Register, MachineInstr *>::iterator II = ImmDefMIs.find(Reg);
assert(II != ImmDefMIs.end() && "couldn't find immediate definition");
if (TII->FoldImmediate(MI, *II->second, Reg, MRI)) {
++NumImmFold;
@@ -1391,33 +1391,30 @@ bool PeepholeOptimizer::foldImmediate(MachineInstr &MI,
// %2 = COPY %0:sub1
//
// Should replace %2 uses with %1:sub1
-bool PeepholeOptimizer::foldRedundantCopy(MachineInstr &MI,
- SmallSet<unsigned, 4> &CopySrcRegs,
- DenseMap<unsigned, MachineInstr *> &CopyMIs) {
+bool PeepholeOptimizer::foldRedundantCopy(
+ MachineInstr &MI, DenseMap<RegSubRegPair, MachineInstr *> &CopyMIs) {
assert(MI.isCopy() && "expected a COPY machine instruction");
Register SrcReg = MI.getOperand(1).getReg();
- if (!Register::isVirtualRegister(SrcReg))
+ unsigned SrcSubReg = MI.getOperand(1).getSubReg();
+ if (!SrcReg.isVirtual())
return false;
Register DstReg = MI.getOperand(0).getReg();
- if (!Register::isVirtualRegister(DstReg))
+ if (!DstReg.isVirtual())
return false;
- if (CopySrcRegs.insert(SrcReg).second) {
+ RegSubRegPair SrcPair(SrcReg, SrcSubReg);
+
+ if (CopyMIs.insert(std::make_pair(SrcPair, &MI)).second) {
// First copy of this reg seen.
- CopyMIs.insert(std::make_pair(SrcReg, &MI));
return false;
}
- MachineInstr *PrevCopy = CopyMIs.find(SrcReg)->second;
+ MachineInstr *PrevCopy = CopyMIs.find(SrcPair)->second;
- unsigned SrcSubReg = MI.getOperand(1).getSubReg();
- unsigned PrevSrcSubReg = PrevCopy->getOperand(1).getSubReg();
-
- // Can't replace different subregister extracts.
- if (SrcSubReg != PrevSrcSubReg)
- return false;
+ assert(SrcSubReg == PrevCopy->getOperand(1).getSubReg() &&
+ "Unexpected mismatching subreg!");
Register PrevDstReg = PrevCopy->getOperand(0).getReg();
@@ -1435,12 +1432,12 @@ bool PeepholeOptimizer::foldRedundantCopy(MachineInstr &MI,
return true;
}
-bool PeepholeOptimizer::isNAPhysCopy(unsigned Reg) {
- return Register::isPhysicalRegister(Reg) && !MRI->isAllocatable(Reg);
+bool PeepholeOptimizer::isNAPhysCopy(Register Reg) {
+ return Reg.isPhysical() && !MRI->isAllocatable(Reg);
}
bool PeepholeOptimizer::foldRedundantNAPhysCopy(
- MachineInstr &MI, DenseMap<unsigned, MachineInstr *> &NAPhysToVirtMIs) {
+ MachineInstr &MI, DenseMap<Register, MachineInstr *> &NAPhysToVirtMIs) {
assert(MI.isCopy() && "expected a COPY machine instruction");
if (DisableNAPhysCopyOpt)
@@ -1449,17 +1446,17 @@ bool PeepholeOptimizer::foldRedundantNAPhysCopy(
Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = MI.getOperand(1).getReg();
if (isNAPhysCopy(SrcReg) && Register::isVirtualRegister(DstReg)) {
- // %vreg = COPY %physreg
+ // %vreg = COPY $physreg
// Avoid using a datastructure which can track multiple live non-allocatable
// phys->virt copies since LLVM doesn't seem to do this.
NAPhysToVirtMIs.insert({SrcReg, &MI});
return false;
}
- if (!(Register::isVirtualRegister(SrcReg) && isNAPhysCopy(DstReg)))
+ if (!(SrcReg.isVirtual() && isNAPhysCopy(DstReg)))
return false;
- // %physreg = COPY %vreg
+ // $physreg = COPY %vreg
auto PrevCopy = NAPhysToVirtMIs.find(DstReg);
if (PrevCopy == NAPhysToVirtMIs.end()) {
// We can't remove the copy: there was an intervening clobber of the
@@ -1489,13 +1486,11 @@ bool PeepholeOptimizer::foldRedundantNAPhysCopy(
/// \bried Returns true if \p MO is a virtual register operand.
static bool isVirtualRegisterOperand(MachineOperand &MO) {
- if (!MO.isReg())
- return false;
- return Register::isVirtualRegister(MO.getReg());
+ return MO.isReg() && MO.getReg().isVirtual();
}
bool PeepholeOptimizer::findTargetRecurrence(
- unsigned Reg, const SmallSet<unsigned, 2> &TargetRegs,
+ Register Reg, const SmallSet<Register, 2> &TargetRegs,
RecurrenceCycle &RC) {
// Recurrence found if Reg is in TargetRegs.
if (TargetRegs.count(Reg))
@@ -1566,7 +1561,7 @@ bool PeepholeOptimizer::findTargetRecurrence(
/// %1 of ADD instruction, the redundant move instruction can be
/// avoided.
bool PeepholeOptimizer::optimizeRecurrence(MachineInstr &PHI) {
- SmallSet<unsigned, 2> TargetRegs;
+ SmallSet<Register, 2> TargetRegs;
for (unsigned Idx = 1; Idx < PHI.getNumOperands(); Idx += 2) {
MachineOperand &MO = PHI.getOperand(Idx);
assert(isVirtualRegisterOperand(MO) && "Invalid PHI instruction");
@@ -1622,20 +1617,20 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
// during the scan, if a MI is not in the set, it is assumed to be located
// after. Newly created MIs have to be inserted in the set as well.
SmallPtrSet<MachineInstr*, 16> LocalMIs;
- SmallSet<unsigned, 4> ImmDefRegs;
- DenseMap<unsigned, MachineInstr*> ImmDefMIs;
- SmallSet<unsigned, 16> FoldAsLoadDefCandidates;
+ SmallSet<Register, 4> ImmDefRegs;
+ DenseMap<Register, MachineInstr *> ImmDefMIs;
+ SmallSet<Register, 16> FoldAsLoadDefCandidates;
// Track when a non-allocatable physical register is copied to a virtual
// register so that useless moves can be removed.
//
- // %physreg is the map index; MI is the last valid `%vreg = COPY %physreg`
- // without any intervening re-definition of %physreg.
- DenseMap<unsigned, MachineInstr *> NAPhysToVirtMIs;
+ // $physreg is the map index; MI is the last valid `%vreg = COPY $physreg`
+ // without any intervening re-definition of $physreg.
+ DenseMap<Register, MachineInstr *> NAPhysToVirtMIs;
- // Set of virtual registers that are copied from.
- SmallSet<unsigned, 4> CopySrcRegs;
- DenseMap<unsigned, MachineInstr *> CopySrcMIs;
+ // Set of pairs of virtual registers and their subregs that are copied
+ // from.
+ DenseMap<RegSubRegPair, MachineInstr *> CopySrcMIs;
bool IsLoopHeader = MLI->isLoopHeader(&MBB);
@@ -1646,9 +1641,10 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
++MII;
LocalMIs.insert(MI);
- // Skip debug instructions. They should not affect this peephole optimization.
+ // Skip debug instructions. They should not affect this peephole
+ // optimization.
if (MI->isDebugInstr())
- continue;
+ continue;
if (MI->isPosition())
continue;
@@ -1678,7 +1674,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
} else if (MO.isRegMask()) {
const uint32_t *RegMask = MO.getRegMask();
for (auto &RegMI : NAPhysToVirtMIs) {
- unsigned Def = RegMI.first;
+ Register Def = RegMI.first;
if (MachineOperand::clobbersPhysReg(RegMask, Def)) {
LLVM_DEBUG(dbgs()
<< "NAPhysCopy: invalidating because of " << *MI);
@@ -1723,9 +1719,8 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
continue;
}
- if (MI->isCopy() &&
- (foldRedundantCopy(*MI, CopySrcRegs, CopySrcMIs) ||
- foldRedundantNAPhysCopy(*MI, NAPhysToVirtMIs))) {
+ if (MI->isCopy() && (foldRedundantCopy(*MI, CopySrcMIs) ||
+ foldRedundantNAPhysCopy(*MI, NAPhysToVirtMIs))) {
LocalMIs.erase(MI);
LLVM_DEBUG(dbgs() << "Deleting redundant copy: " << *MI << "\n");
MI->eraseFromParent();
@@ -1763,13 +1758,13 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
const MachineOperand &MOp = MI->getOperand(i);
if (!MOp.isReg())
continue;
- unsigned FoldAsLoadDefReg = MOp.getReg();
+ Register FoldAsLoadDefReg = MOp.getReg();
if (FoldAsLoadDefCandidates.count(FoldAsLoadDefReg)) {
// We need to fold load after optimizeCmpInstr, since
// optimizeCmpInstr can enable folding by converting SUB to CMP.
// Save FoldAsLoadDefReg because optimizeLoadInstr() resets it and
// we need it for markUsesInDebugValueAsUndef().
- unsigned FoldedReg = FoldAsLoadDefReg;
+ Register FoldedReg = FoldAsLoadDefReg;
MachineInstr *DefMI = nullptr;
if (MachineInstr *FoldMI =
TII->optimizeLoadInstr(*MI, MRI, FoldAsLoadDefReg, DefMI)) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp
index 4f88f4d3dd6a..82ed386db827 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp
@@ -82,11 +82,9 @@ bool PostRAHazardRecognizer::runOnMachineFunction(MachineFunction &Fn) {
for (MachineInstr &MI : MBB) {
// If we need to emit noops prior to this instruction, then do so.
unsigned NumPreNoops = HazardRec->PreEmitNoops(&MI);
- for (unsigned i = 0; i != NumPreNoops; ++i) {
- HazardRec->EmitNoop();
- TII->insertNoop(MBB, MachineBasicBlock::iterator(MI));
- ++NumNoops;
- }
+ HazardRec->EmitNoops(NumPreNoops);
+ TII->insertNoops(MBB, MachineBasicBlock::iterator(MI), NumPreNoops);
+ NumNoops += NumPreNoops;
HazardRec->EmitInstruction(&MI);
if (HazardRec->atIssueLimit()) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
index 1be9544848ec..80c38f3ec341 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
@@ -96,7 +96,7 @@ static bool lowerObjCCall(Function &F, const char *NewFn,
++I;
IRBuilder<> Builder(CI->getParent(), CI->getIterator());
- SmallVector<Value *, 8> Args(CI->arg_begin(), CI->arg_end());
+ SmallVector<Value *, 8> Args(CI->args());
CallInst *NewCI = Builder.CreateCall(FCache, Args);
NewCI->setName(CI->getName());
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index a489f493d5ee..378aaba2a65f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -620,12 +620,12 @@ void PEI::spillCalleeSavedRegs(MachineFunction &MF) {
if (!MFI.hasCalls())
NumLeafFuncWithSpills++;
- for (MachineBasicBlock *SaveBlock : SaveBlocks) {
+ for (MachineBasicBlock *SaveBlock : SaveBlocks)
insertCSRSaves(*SaveBlock, CSI);
- // Update the live-in information of all the blocks up to the save
- // point.
- updateLiveness(MF);
- }
+
+ // Update the live-in information of all the blocks up to the save point.
+ updateLiveness(MF);
+
for (MachineBasicBlock *RestoreBlock : RestoreBlocks)
insertCSRRestores(*RestoreBlock, CSI);
}
@@ -1077,7 +1077,26 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
// If the frame pointer is eliminated, all frame offsets will be relative to
// SP not FP. Align to MaxAlign so this works.
StackAlign = std::max(StackAlign, MaxAlign);
+ int64_t OffsetBeforeAlignment = Offset;
Offset = alignTo(Offset, StackAlign, Skew);
+
+ // If we have increased the offset to fulfill the alignment constrants,
+ // then the scavenging spill slots may become harder to reach from the
+ // stack pointer, float them so they stay close.
+ if (OffsetBeforeAlignment != Offset && RS && !EarlyScavengingSlots) {
+ SmallVector<int, 2> SFIs;
+ RS->getScavengingFrameIndices(SFIs);
+ LLVM_DEBUG(if (!SFIs.empty()) llvm::dbgs()
+ << "Adjusting emergency spill slots!\n";);
+ int64_t Delta = Offset - OffsetBeforeAlignment;
+ for (SmallVectorImpl<int>::iterator I = SFIs.begin(), IE = SFIs.end();
+ I != IE; ++I) {
+ LLVM_DEBUG(llvm::dbgs() << "Adjusting offset of emergency spill slot #"
+ << *I << " from " << MFI.getObjectOffset(*I););
+ MFI.setObjectOffset(*I, MFI.getObjectOffset(*I) - Delta);
+ LLVM_DEBUG(llvm::dbgs() << " to " << MFI.getObjectOffset(*I) << "\n";);
+ }
+ }
}
// Update frame info to pretend that this is part of the stack...
@@ -1209,7 +1228,7 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF,
unsigned FrameIdx = MI.getOperand(0).getIndex();
unsigned Size = MF.getFrameInfo().getObjectSize(FrameIdx);
- int64_t Offset =
+ StackOffset Offset =
TFI->getFrameIndexReference(MF, FrameIdx, Reg);
MI.getOperand(0).ChangeToRegister(Reg, false /*isDef*/);
MI.getOperand(0).setIsDebug();
@@ -1236,7 +1255,8 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF,
// Make the DBG_VALUE direct.
MI.getDebugOffset().ChangeToRegister(0, false);
}
- DIExpr = DIExpression::prepend(DIExpr, PrependFlags, Offset);
+
+ DIExpr = TRI.prependOffsetExpression(DIExpr, PrependFlags, Offset);
MI.getDebugExpressionOp().setMetadata(DIExpr);
continue;
}
@@ -1252,9 +1272,11 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF,
"DBG_VALUE machine instruction");
Register Reg;
MachineOperand &Offset = MI.getOperand(i + 1);
- int refOffset = TFI->getFrameIndexReferencePreferSP(
+ StackOffset refOffset = TFI->getFrameIndexReferencePreferSP(
MF, MI.getOperand(i).getIndex(), Reg, /*IgnoreSPUpdates*/ false);
- Offset.setImm(Offset.getImm() + refOffset + SPAdj);
+ assert(!refOffset.getScalable() &&
+ "Frame offsets with a scalable component are not supported");
+ Offset.setImm(Offset.getImm() + refOffset.getFixed() + SPAdj);
MI.getOperand(i).ChangeToRegister(Reg, false /*isDef*/);
continue;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PseudoProbeInserter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PseudoProbeInserter.cpp
new file mode 100644
index 000000000000..9c716a5a37ea
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PseudoProbeInserter.cpp
@@ -0,0 +1,95 @@
+//===- PseudoProbeInserter.cpp - Insert annotation for callsite profiling -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements PseudoProbeInserter pass, which inserts pseudo probe
+// annotations for call instructions with a pseudo-probe-specific dwarf
+// discriminator. such discriminator indicates that the call instruction comes
+// with a pseudo probe, and the discriminator value holds information to
+// identify the corresponding counter.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/PseudoProbe.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Target/TargetMachine.h"
+#include <unordered_map>
+
+#define DEBUG_TYPE "pseudo-probe-inserter"
+
+using namespace llvm;
+
+namespace {
+class PseudoProbeInserter : public MachineFunctionPass {
+public:
+ static char ID;
+
+ PseudoProbeInserter() : MachineFunctionPass(ID) {
+ initializePseudoProbeInserterPass(*PassRegistry::getPassRegistry());
+ }
+
+ StringRef getPassName() const override { return "Pseudo Probe Inserter"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ bool Changed = false;
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ if (MI.isCall()) {
+ if (DILocation *DL = MI.getDebugLoc()) {
+ auto Value = DL->getDiscriminator();
+ if (DILocation::isPseudoProbeDiscriminator(Value)) {
+ BuildMI(MBB, MI, DL, TII->get(TargetOpcode::PSEUDO_PROBE))
+ .addImm(getFuncGUID(MF.getFunction().getParent(), DL))
+ .addImm(
+ PseudoProbeDwarfDiscriminator::extractProbeIndex(Value))
+ .addImm(
+ PseudoProbeDwarfDiscriminator::extractProbeType(Value))
+ .addImm(PseudoProbeDwarfDiscriminator::extractProbeAttributes(
+ Value));
+ Changed = true;
+ }
+ }
+ }
+ }
+ }
+
+ return Changed;
+ }
+
+private:
+ uint64_t getFuncGUID(Module *M, DILocation *DL) {
+ auto *SP = DL->getScope()->getSubprogram();
+ auto Name = SP->getLinkageName();
+ if (Name.empty())
+ Name = SP->getName();
+ return Function::getGUID(Name);
+ }
+};
+} // namespace
+
+char PseudoProbeInserter::ID = 0;
+INITIALIZE_PASS_BEGIN(PseudoProbeInserter, DEBUG_TYPE,
+ "Insert pseudo probe annotations for value profiling",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_END(PseudoProbeInserter, DEBUG_TYPE,
+ "Insert pseudo probe annotations for value profiling",
+ false, false)
+
+FunctionPass *llvm::createPseudoProbeInserter() {
+ return new PseudoProbeInserter();
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RDFGraph.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RDFGraph.cpp
index 437a6b030096..cebb902f0a4a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RDFGraph.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RDFGraph.cpp
@@ -984,11 +984,6 @@ RegisterRef DataFlowGraph::restrictRef(RegisterRef AR, RegisterRef BR) const {
LaneBitmask M = AR.Mask & BR.Mask;
return M.any() ? RegisterRef(AR.Reg, M) : RegisterRef();
}
-#ifndef NDEBUG
-// RegisterRef NAR = PRI.normalize(AR);
-// RegisterRef NBR = PRI.normalize(BR);
-// assert(NAR.Reg != NBR.Reg);
-#endif
// This isn't strictly correct, because the overlap may happen in the
// part masked out.
if (PRI.alias(AR, BR))
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RDFLiveness.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RDFLiveness.cpp
index 0bcd27f8ea45..76bf0c280970 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RDFLiveness.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RDFLiveness.cpp
@@ -23,8 +23,10 @@
// <10.1145/2086696.2086706>. <hal-00647369>
//
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineDominanceFrontier.h"
#include "llvm/CodeGen/MachineDominators.h"
@@ -45,6 +47,7 @@
#include <cstdint>
#include <iterator>
#include <map>
+#include <unordered_map>
#include <utility>
#include <vector>
@@ -108,7 +111,7 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
const RegisterAggr &DefRRs) {
NodeList RDefs; // Return value.
SetVector<NodeId> DefQ;
- SetVector<NodeId> Owners;
+ DenseMap<MachineInstr*, uint32_t> OrdMap;
// Dead defs will be treated as if they were live, since they are actually
// on the data-flow path. They cannot be ignored because even though they
@@ -151,18 +154,9 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
for (auto S : DFG.getRelatedRefs(TA.Addr->getOwner(DFG), TA))
if (NodeId RD = NodeAddr<RefNode*>(S).Addr->getReachingDef())
DefQ.insert(RD);
- }
-
- // Remove all non-phi defs that are not aliased to RefRR, and collect
- // the owners of the remaining defs.
- SetVector<NodeId> Defs;
- for (NodeId N : DefQ) {
- auto TA = DFG.addr<DefNode*>(N);
- bool IsPhi = TA.Addr->getFlags() & NodeAttrs::PhiRef;
- if (!IsPhi && !PRI.alias(RefRR, TA.Addr->getRegRef(DFG)))
- continue;
- Defs.insert(TA.Id);
- Owners.insert(TA.Addr->getOwner(DFG).Id);
+ // Don't visit sibling defs. They share the same reaching def (which
+ // will be visited anyway), but they define something not aliased to
+ // this ref.
}
// Return the MachineBasicBlock containing a given instruction.
@@ -174,38 +168,80 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
NodeAddr<BlockNode*> BA = PA.Addr->getOwner(DFG);
return BA.Addr->getCode();
};
- // Less(A,B) iff instruction A is further down in the dominator tree than B.
- auto Less = [&Block,this] (NodeId A, NodeId B) -> bool {
+
+ SmallSet<NodeId,32> Defs;
+
+ // Remove all non-phi defs that are not aliased to RefRR, and segregate
+ // the the remaining defs into buckets for containing blocks.
+ std::map<NodeId, NodeAddr<InstrNode*>> Owners;
+ std::map<MachineBasicBlock*, SmallVector<NodeId,32>> Blocks;
+ for (NodeId N : DefQ) {
+ auto TA = DFG.addr<DefNode*>(N);
+ bool IsPhi = TA.Addr->getFlags() & NodeAttrs::PhiRef;
+ if (!IsPhi && !PRI.alias(RefRR, TA.Addr->getRegRef(DFG)))
+ continue;
+ Defs.insert(TA.Id);
+ NodeAddr<InstrNode*> IA = TA.Addr->getOwner(DFG);
+ Owners[TA.Id] = IA;
+ Blocks[Block(IA)].push_back(IA.Id);
+ }
+
+ auto Precedes = [this,&OrdMap] (NodeId A, NodeId B) {
if (A == B)
return false;
- auto OA = DFG.addr<InstrNode*>(A), OB = DFG.addr<InstrNode*>(B);
- MachineBasicBlock *BA = Block(OA), *BB = Block(OB);
- if (BA != BB)
- return MDT.dominates(BB, BA);
- // They are in the same block.
+ NodeAddr<InstrNode*> OA = DFG.addr<InstrNode*>(A);
+ NodeAddr<InstrNode*> OB = DFG.addr<InstrNode*>(B);
bool StmtA = OA.Addr->getKind() == NodeAttrs::Stmt;
bool StmtB = OB.Addr->getKind() == NodeAttrs::Stmt;
- if (StmtA) {
- if (!StmtB) // OB is a phi and phis dominate statements.
- return true;
- MachineInstr *CA = NodeAddr<StmtNode*>(OA).Addr->getCode();
- MachineInstr *CB = NodeAddr<StmtNode*>(OB).Addr->getCode();
- // The order must be linear, so tie-break such equalities.
- if (CA == CB)
- return A < B;
- return MDT.dominates(CB, CA);
- } else {
- // OA is a phi.
- if (StmtB)
- return false;
- // Both are phis. There is no ordering between phis (in terms of
- // the data-flow), so tie-break this via node id comparison.
+ if (StmtA && StmtB) {
+ const MachineInstr *InA = NodeAddr<StmtNode*>(OA).Addr->getCode();
+ const MachineInstr *InB = NodeAddr<StmtNode*>(OB).Addr->getCode();
+ assert(InA->getParent() == InB->getParent());
+ auto FA = OrdMap.find(InA);
+ if (FA != OrdMap.end())
+ return FA->second < OrdMap.find(InB)->second;
+ const MachineBasicBlock *BB = InA->getParent();
+ for (auto It = BB->begin(), E = BB->end(); It != E; ++It) {
+ if (It == InA->getIterator())
+ return true;
+ if (It == InB->getIterator())
+ return false;
+ }
+ llvm_unreachable("InA and InB should be in the same block");
+ }
+ // One of them is a phi node.
+ if (!StmtA && !StmtB) {
+ // Both are phis, which are unordered. Break the tie by id numbers.
return A < B;
}
+ // Only one of them is a phi. Phis always precede statements.
+ return !StmtA;
};
- std::vector<NodeId> Tmp(Owners.begin(), Owners.end());
- llvm::sort(Tmp, Less);
+ auto GetOrder = [&OrdMap] (MachineBasicBlock &B) {
+ uint32_t Pos = 0;
+ for (MachineInstr &In : B)
+ OrdMap.insert({&In, ++Pos});
+ };
+
+ // For each block, sort the nodes in it.
+ std::vector<MachineBasicBlock*> TmpBB;
+ for (auto &Bucket : Blocks) {
+ TmpBB.push_back(Bucket.first);
+ if (Bucket.second.size() > 2)
+ GetOrder(*Bucket.first);
+ llvm::sort(Bucket.second, Precedes);
+ }
+
+ // Sort the blocks with respect to dominance.
+ llvm::sort(TmpBB,
+ [this](auto A, auto B) { return MDT.properlyDominates(A, B); });
+
+ std::vector<NodeId> TmpInst;
+ for (auto I = TmpBB.rbegin(), E = TmpBB.rend(); I != E; ++I) {
+ auto &Bucket = Blocks[*I];
+ TmpInst.insert(TmpInst.end(), Bucket.rbegin(), Bucket.rend());
+ }
// The vector is a list of instructions, so that defs coming from
// the same instruction don't need to be artificially ordered.
@@ -220,6 +256,9 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
// *d3<C> If A \incl BuC, and B \incl AuC, then *d2 would be
// covered if we added A first, and A would be covered
// if we added B first.
+ // In this example we want both A and B, because we don't want to give
+ // either one priority over the other, since they belong to the same
+ // statement.
RegisterAggr RRs(DefRRs);
@@ -227,7 +266,8 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
return TA.Addr->getKind() == NodeAttrs::Def &&
Defs.count(TA.Id);
};
- for (NodeId T : Tmp) {
+
+ for (NodeId T : TmpInst) {
if (!FullChain && RRs.hasCoverOf(RefRR))
break;
auto TA = DFG.addr<InstrNode*>(T);
@@ -246,7 +286,7 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
if (FullChain || IsPhi || !RRs.hasCoverOf(QR))
Ds.push_back(DA);
}
- RDefs.insert(RDefs.end(), Ds.begin(), Ds.end());
+ llvm::append_range(RDefs, Ds);
for (NodeAddr<DefNode*> DA : Ds) {
// When collecting a full chain of definitions, do not consider phi
// defs to actually define a register.
@@ -260,7 +300,7 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
auto DeadP = [](const NodeAddr<DefNode*> DA) -> bool {
return DA.Addr->getFlags() & NodeAttrs::Dead;
};
- RDefs.resize(std::distance(RDefs.begin(), llvm::remove_if(RDefs, DeadP)));
+ llvm::erase_if(RDefs, DeadP);
return RDefs;
}
@@ -430,13 +470,13 @@ void Liveness::computePhiInfo() {
NodeList Blocks = FA.Addr->members(DFG);
for (NodeAddr<BlockNode*> BA : Blocks) {
auto Ps = BA.Addr->members_if(DFG.IsCode<NodeAttrs::Phi>, DFG);
- Phis.insert(Phis.end(), Ps.begin(), Ps.end());
+ llvm::append_range(Phis, Ps);
}
// phi use -> (map: reaching phi -> set of registers defined in between)
std::map<NodeId,std::map<NodeId,RegisterAggr>> PhiUp;
std::vector<NodeId> PhiUQ; // Work list of phis for upward propagation.
- std::map<NodeId,RegisterAggr> PhiDRs; // Phi -> registers defined by it.
+ std::unordered_map<NodeId,RegisterAggr> PhiDRs; // Phi -> registers defined by it.
// Go over all phis.
for (NodeAddr<PhiNode*> PhiA : Phis) {
@@ -474,7 +514,7 @@ void Liveness::computePhiInfo() {
NodeAddr<UseNode*> A = DFG.addr<UseNode*>(UN);
uint16_t F = A.Addr->getFlags();
if ((F & (NodeAttrs::Undef | NodeAttrs::PhiRef)) == 0) {
- RegisterRef R = PRI.normalize(A.Addr->getRegRef(DFG));
+ RegisterRef R = A.Addr->getRegRef(DFG);
RealUses[R.Reg].insert({A.Id,R.Mask});
}
UN = A.Addr->getSibling();
@@ -612,6 +652,23 @@ void Liveness::computePhiInfo() {
// is covered, or until reaching the final phi. Only assume that the
// reference reaches the phi in the latter case.
+ // The operation "clearIn" can be expensive. For a given set of intervening
+ // defs, cache the result of subtracting these defs from a given register
+ // ref.
+ using SubMap = std::unordered_map<RegisterRef, RegisterRef>;
+ std::unordered_map<RegisterAggr, SubMap> Subs;
+ auto ClearIn = [] (RegisterRef RR, const RegisterAggr &Mid, SubMap &SM) {
+ if (Mid.empty())
+ return RR;
+ auto F = SM.find(RR);
+ if (F != SM.end())
+ return F->second;
+ RegisterRef S = Mid.clearIn(RR);
+ SM.insert({RR, S});
+ return S;
+ };
+
+ // Go over all phis.
for (unsigned i = 0; i < PhiUQ.size(); ++i) {
auto PA = DFG.addr<PhiNode*>(PhiUQ[i]);
NodeList PUs = PA.Addr->members_if(DFG.IsRef<NodeAttrs::Use>, DFG);
@@ -619,17 +676,17 @@ void Liveness::computePhiInfo() {
for (NodeAddr<UseNode*> UA : PUs) {
std::map<NodeId,RegisterAggr> &PUM = PhiUp[UA.Id];
- RegisterRef UR = PRI.normalize(UA.Addr->getRegRef(DFG));
+ RegisterRef UR = UA.Addr->getRegRef(DFG);
for (const std::pair<const NodeId, RegisterAggr> &P : PUM) {
bool Changed = false;
const RegisterAggr &MidDefs = P.second;
-
// Collect the set PropUp of uses that are reached by the current
// phi PA, and are not covered by any intervening def between the
// currently visited use UA and the upward phi P.
if (MidDefs.hasCoverOf(UR))
continue;
+ SubMap &SM = Subs[MidDefs];
// General algorithm:
// for each (R,U) : U is use node of R, U is reached by PA
@@ -649,7 +706,7 @@ void Liveness::computePhiInfo() {
LaneBitmask M = R.Mask & V.second;
if (M.none())
continue;
- if (RegisterRef SS = MidDefs.clearIn(RegisterRef(R.Reg, M))) {
+ if (RegisterRef SS = ClearIn(RegisterRef(R.Reg, M), MidDefs, SM)) {
NodeRefSet &RS = RealUseMap[P.first][SS.Reg];
Changed |= RS.insert({V.first,SS.Mask}).second;
}
@@ -1073,7 +1130,7 @@ void Liveness::traverse(MachineBasicBlock *B, RefMap &LiveIn) {
for (NodeAddr<UseNode*> UA : IA.Addr->members_if(DFG.IsUse, DFG)) {
if (UA.Addr->getFlags() & NodeAttrs::Undef)
continue;
- RegisterRef RR = PRI.normalize(UA.Addr->getRegRef(DFG));
+ RegisterRef RR = UA.Addr->getRegRef(DFG);
for (NodeAddr<DefNode*> D : getAllReachingDefs(UA))
if (getBlockWithRef(D.Id) != B)
LiveIn[RR.Reg].insert({D.Id,RR.Mask});
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RDFRegisters.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RDFRegisters.cpp
index bd8661816e71..8760ba118934 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RDFRegisters.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RDFRegisters.cpp
@@ -84,18 +84,23 @@ PhysicalRegisterInfo::PhysicalRegisterInfo(const TargetRegisterInfo &tri,
for (uint32_t M = 1, NM = RegMasks.size(); M <= NM; ++M) {
BitVector PU(TRI.getNumRegUnits());
const uint32_t *MB = RegMasks.get(M);
- for (unsigned i = 1, e = TRI.getNumRegs(); i != e; ++i) {
- if (!(MB[i/32] & (1u << (i%32))))
+ for (unsigned I = 1, E = TRI.getNumRegs(); I != E; ++I) {
+ if (!(MB[I / 32] & (1u << (I % 32))))
continue;
- for (MCRegUnitIterator U(i, &TRI); U.isValid(); ++U)
+ for (MCRegUnitIterator U(MCRegister::from(I), &TRI); U.isValid(); ++U)
PU.set(*U);
}
MaskInfos[M].Units = PU.flip();
}
-}
-RegisterRef PhysicalRegisterInfo::normalize(RegisterRef RR) const {
- return RR;
+ AliasInfos.resize(TRI.getNumRegUnits());
+ for (uint32_t U = 0, NU = TRI.getNumRegUnits(); U != NU; ++U) {
+ BitVector AS(TRI.getNumRegs());
+ for (MCRegUnitRootIterator R(U, &TRI); R.isValid(); ++R)
+ for (MCSuperRegIterator S(*R, &TRI, true); S.isValid(); ++S)
+ AS.set(*S);
+ AliasInfos[U].Regs = AS;
+ }
}
std::set<RegisterId> PhysicalRegisterInfo::getAliasSet(RegisterId Reg) const {
@@ -321,26 +326,17 @@ RegisterRef RegisterAggr::makeRegRef() const {
if (U < 0)
return RegisterRef();
- auto AliasedRegs = [this] (uint32_t Unit, BitVector &Regs) {
- for (MCRegUnitRootIterator R(Unit, &PRI.getTRI()); R.isValid(); ++R)
- for (MCSuperRegIterator S(*R, &PRI.getTRI(), true); S.isValid(); ++S)
- Regs.set(*S);
- };
-
// Find the set of all registers that are aliased to all the units
// in this aggregate.
// Get all the registers aliased to the first unit in the bit vector.
- BitVector Regs(PRI.getTRI().getNumRegs());
- AliasedRegs(U, Regs);
+ BitVector Regs = PRI.getUnitAliases(U);
U = Units.find_next(U);
// For each other unit, intersect it with the set of all registers
// aliased that unit.
while (U >= 0) {
- BitVector AR(PRI.getTRI().getNumRegs());
- AliasedRegs(U, AR);
- Regs &= AR;
+ Regs &= PRI.getUnitAliases(U);
U = Units.find_next(U);
}
@@ -378,3 +374,8 @@ RegisterAggr::rr_iterator::rr_iterator(const RegisterAggr &RG,
Pos = End ? Masks.end() : Masks.begin();
Index = End ? Masks.size() : 0;
}
+
+raw_ostream &rdf::operator<<(raw_ostream &OS, const RegisterAggr &A) {
+ A.print(OS);
+ return OS;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
index 5bd8b4b8e27f..d16e90a7e0b4 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
@@ -29,7 +29,7 @@ static bool isValidRegUse(const MachineOperand &MO) {
return isValidReg(MO) && MO.isUse();
}
-static bool isValidRegUseOf(const MachineOperand &MO, int PhysReg) {
+static bool isValidRegUseOf(const MachineOperand &MO, MCRegister PhysReg) {
return isValidRegUse(MO) && MO.getReg() == PhysReg;
}
@@ -37,7 +37,7 @@ static bool isValidRegDef(const MachineOperand &MO) {
return isValidReg(MO) && MO.isDef();
}
-static bool isValidRegDefOf(const MachineOperand &MO, int PhysReg) {
+static bool isValidRegDefOf(const MachineOperand &MO, MCRegister PhysReg) {
return isValidRegDef(MO) && MO.getReg() == PhysReg;
}
@@ -121,7 +121,8 @@ void ReachingDefAnalysis::processDefs(MachineInstr *MI) {
for (auto &MO : MI->operands()) {
if (!isValidRegDef(MO))
continue;
- for (MCRegUnitIterator Unit(MO.getReg(), TRI); Unit.isValid(); ++Unit) {
+ for (MCRegUnitIterator Unit(MO.getReg().asMCReg(), TRI); Unit.isValid();
+ ++Unit) {
// This instruction explicitly defines the current reg unit.
LLVM_DEBUG(dbgs() << printReg(*Unit, TRI) << ":\t" << CurInstr
<< '\t' << *MI);
@@ -143,10 +144,9 @@ void ReachingDefAnalysis::reprocessBasicBlock(MachineBasicBlock *MBB) {
"Unexpected basic block number.");
// Count number of non-debug instructions for end of block adjustment.
- int NumInsts = 0;
- for (const MachineInstr &MI : *MBB)
- if (!MI.isDebugInstr())
- NumInsts++;
+ auto NonDbgInsts =
+ instructionsWithoutDebug(MBB->instr_begin(), MBB->instr_end());
+ int NumInsts = std::distance(NonDbgInsts.begin(), NonDbgInsts.end());
// When reprocessing a block, the only thing we need to do is check whether
// there is now a more recent incoming reaching definition from a predecessor.
@@ -197,10 +197,9 @@ void ReachingDefAnalysis::processBasicBlock(
}
enterBasicBlock(MBB);
- for (MachineInstr &MI : *MBB) {
- if (!MI.isDebugInstr())
- processDefs(&MI);
- }
+ for (MachineInstr &MI :
+ instructionsWithoutDebug(MBB->instr_begin(), MBB->instr_end()))
+ processDefs(&MI);
leaveBasicBlock(MBB);
}
@@ -254,7 +253,8 @@ void ReachingDefAnalysis::traverse() {
#endif
}
-int ReachingDefAnalysis::getReachingDef(MachineInstr *MI, int PhysReg) const {
+int ReachingDefAnalysis::getReachingDef(MachineInstr *MI,
+ MCRegister PhysReg) const {
assert(InstIds.count(MI) && "Unexpected machine instuction.");
int InstId = InstIds.lookup(MI);
int DefRes = ReachingDefDefaultVal;
@@ -273,13 +273,16 @@ int ReachingDefAnalysis::getReachingDef(MachineInstr *MI, int PhysReg) const {
return LatestDef;
}
-MachineInstr* ReachingDefAnalysis::getReachingLocalMIDef(MachineInstr *MI,
- int PhysReg) const {
- return getInstFromId(MI->getParent(), getReachingDef(MI, PhysReg));
+MachineInstr *
+ReachingDefAnalysis::getReachingLocalMIDef(MachineInstr *MI,
+ MCRegister PhysReg) const {
+ return hasLocalDefBefore(MI, PhysReg)
+ ? getInstFromId(MI->getParent(), getReachingDef(MI, PhysReg))
+ : nullptr;
}
bool ReachingDefAnalysis::hasSameReachingDef(MachineInstr *A, MachineInstr *B,
- int PhysReg) const {
+ MCRegister PhysReg) const {
MachineBasicBlock *ParentA = A->getParent();
MachineBasicBlock *ParentB = B->getParent();
if (ParentA != ParentB)
@@ -307,18 +310,19 @@ MachineInstr *ReachingDefAnalysis::getInstFromId(MachineBasicBlock *MBB,
return nullptr;
}
-int
-ReachingDefAnalysis::getClearance(MachineInstr *MI, MCPhysReg PhysReg) const {
+int ReachingDefAnalysis::getClearance(MachineInstr *MI,
+ MCRegister PhysReg) const {
assert(InstIds.count(MI) && "Unexpected machine instuction.");
return InstIds.lookup(MI) - getReachingDef(MI, PhysReg);
}
-bool
-ReachingDefAnalysis::hasLocalDefBefore(MachineInstr *MI, int PhysReg) const {
+bool ReachingDefAnalysis::hasLocalDefBefore(MachineInstr *MI,
+ MCRegister PhysReg) const {
return getReachingDef(MI, PhysReg) >= 0;
}
-void ReachingDefAnalysis::getReachingLocalUses(MachineInstr *Def, int PhysReg,
+void ReachingDefAnalysis::getReachingLocalUses(MachineInstr *Def,
+ MCRegister PhysReg,
InstSet &Uses) const {
MachineBasicBlock *MBB = Def->getParent();
MachineBasicBlock::iterator MI = MachineBasicBlock::iterator(Def);
@@ -342,12 +346,11 @@ void ReachingDefAnalysis::getReachingLocalUses(MachineInstr *Def, int PhysReg,
}
}
-bool
-ReachingDefAnalysis::getLiveInUses(MachineBasicBlock *MBB, int PhysReg,
- InstSet &Uses) const {
- for (auto &MI : *MBB) {
- if (MI.isDebugInstr())
- continue;
+bool ReachingDefAnalysis::getLiveInUses(MachineBasicBlock *MBB,
+ MCRegister PhysReg,
+ InstSet &Uses) const {
+ for (MachineInstr &MI :
+ instructionsWithoutDebug(MBB->instr_begin(), MBB->instr_end())) {
for (auto &MO : MI.operands()) {
if (!isValidRegUseOf(MO, PhysReg))
continue;
@@ -356,12 +359,14 @@ ReachingDefAnalysis::getLiveInUses(MachineBasicBlock *MBB, int PhysReg,
Uses.insert(&MI);
}
}
- return isReachingDefLiveOut(&MBB->back(), PhysReg);
+ auto Last = MBB->getLastNonDebugInstr();
+ if (Last == MBB->end())
+ return true;
+ return isReachingDefLiveOut(&*Last, PhysReg);
}
-void
-ReachingDefAnalysis::getGlobalUses(MachineInstr *MI, int PhysReg,
- InstSet &Uses) const {
+void ReachingDefAnalysis::getGlobalUses(MachineInstr *MI, MCRegister PhysReg,
+ InstSet &Uses) const {
MachineBasicBlock *MBB = MI->getParent();
// Collect the uses that each def touches within the block.
@@ -372,9 +377,7 @@ ReachingDefAnalysis::getGlobalUses(MachineInstr *MI, int PhysReg,
if (LiveOut != MI)
return;
- SmallVector<MachineBasicBlock*, 4> ToVisit;
- ToVisit.insert(ToVisit.begin(), MBB->successors().begin(),
- MBB->successors().end());
+ SmallVector<MachineBasicBlock *, 4> ToVisit(MBB->successors());
SmallPtrSet<MachineBasicBlock*, 4>Visited;
while (!ToVisit.empty()) {
MachineBasicBlock *MBB = ToVisit.back();
@@ -382,22 +385,33 @@ ReachingDefAnalysis::getGlobalUses(MachineInstr *MI, int PhysReg,
if (Visited.count(MBB) || !MBB->isLiveIn(PhysReg))
continue;
if (getLiveInUses(MBB, PhysReg, Uses))
- ToVisit.insert(ToVisit.end(), MBB->successors().begin(),
- MBB->successors().end());
+ llvm::append_range(ToVisit, MBB->successors());
Visited.insert(MBB);
}
}
}
-void ReachingDefAnalysis::getLiveOuts(MachineBasicBlock *MBB, int PhysReg,
- InstSet &Defs) const {
+void ReachingDefAnalysis::getGlobalReachingDefs(MachineInstr *MI,
+ MCRegister PhysReg,
+ InstSet &Defs) const {
+ if (auto *Def = getUniqueReachingMIDef(MI, PhysReg)) {
+ Defs.insert(Def);
+ return;
+ }
+
+ for (auto *MBB : MI->getParent()->predecessors())
+ getLiveOuts(MBB, PhysReg, Defs);
+}
+
+void ReachingDefAnalysis::getLiveOuts(MachineBasicBlock *MBB,
+ MCRegister PhysReg, InstSet &Defs) const {
SmallPtrSet<MachineBasicBlock*, 2> VisitedBBs;
getLiveOuts(MBB, PhysReg, Defs, VisitedBBs);
}
-void
-ReachingDefAnalysis::getLiveOuts(MachineBasicBlock *MBB, int PhysReg,
- InstSet &Defs, BlockSet &VisitedBBs) const {
+void ReachingDefAnalysis::getLiveOuts(MachineBasicBlock *MBB,
+ MCRegister PhysReg, InstSet &Defs,
+ BlockSet &VisitedBBs) const {
if (VisitedBBs.count(MBB))
return;
@@ -414,26 +428,25 @@ ReachingDefAnalysis::getLiveOuts(MachineBasicBlock *MBB, int PhysReg,
getLiveOuts(Pred, PhysReg, Defs, VisitedBBs);
}
-MachineInstr *ReachingDefAnalysis::getUniqueReachingMIDef(MachineInstr *MI,
- int PhysReg) const {
+MachineInstr *
+ReachingDefAnalysis::getUniqueReachingMIDef(MachineInstr *MI,
+ MCRegister PhysReg) const {
// If there's a local def before MI, return it.
MachineInstr *LocalDef = getReachingLocalMIDef(MI, PhysReg);
if (LocalDef && InstIds.lookup(LocalDef) < InstIds.lookup(MI))
return LocalDef;
- SmallPtrSet<MachineBasicBlock*, 4> VisitedBBs;
SmallPtrSet<MachineInstr*, 2> Incoming;
- for (auto *Pred : MI->getParent()->predecessors())
- getLiveOuts(Pred, PhysReg, Incoming, VisitedBBs);
-
- // If we have a local def and an incoming instruction, then there's not a
- // unique instruction def.
- if (!Incoming.empty() && LocalDef)
- return nullptr;
- else if (Incoming.size() == 1)
+ MachineBasicBlock *Parent = MI->getParent();
+ for (auto *Pred : Parent->predecessors())
+ getLiveOuts(Pred, PhysReg, Incoming);
+
+ // Check that we have a single incoming value and that it does not
+ // come from the same block as MI - since it would mean that the def
+ // is executed after MI.
+ if (Incoming.size() == 1 && (*Incoming.begin())->getParent() != Parent)
return *Incoming.begin();
- else
- return LocalDef;
+ return nullptr;
}
MachineInstr *ReachingDefAnalysis::getMIOperand(MachineInstr *MI,
@@ -448,7 +461,8 @@ MachineInstr *ReachingDefAnalysis::getMIOperand(MachineInstr *MI,
return getUniqueReachingMIDef(MI, MO.getReg());
}
-bool ReachingDefAnalysis::isRegUsedAfter(MachineInstr *MI, int PhysReg) const {
+bool ReachingDefAnalysis::isRegUsedAfter(MachineInstr *MI,
+ MCRegister PhysReg) const {
MachineBasicBlock *MBB = MI->getParent();
LivePhysRegs LiveRegs(*TRI);
LiveRegs.addLiveOuts(*MBB);
@@ -459,18 +473,21 @@ bool ReachingDefAnalysis::isRegUsedAfter(MachineInstr *MI, int PhysReg) const {
// Walk backwards through the block to see if the register is live at some
// point.
- for (auto Last = MBB->rbegin(), End = MBB->rend(); Last != End; ++Last) {
- LiveRegs.stepBackward(*Last);
+ for (MachineInstr &Last :
+ instructionsWithoutDebug(MBB->instr_rbegin(), MBB->instr_rend())) {
+ LiveRegs.stepBackward(Last);
if (LiveRegs.contains(PhysReg))
- return InstIds.lookup(&*Last) > InstIds.lookup(MI);
+ return InstIds.lookup(&Last) > InstIds.lookup(MI);
}
return false;
}
bool ReachingDefAnalysis::isRegDefinedAfter(MachineInstr *MI,
- int PhysReg) const {
+ MCRegister PhysReg) const {
MachineBasicBlock *MBB = MI->getParent();
- if (getReachingDef(MI, PhysReg) != getReachingDef(&MBB->back(), PhysReg))
+ auto Last = MBB->getLastNonDebugInstr();
+ if (Last != MBB->end() &&
+ getReachingDef(MI, PhysReg) != getReachingDef(&*Last, PhysReg))
return true;
if (auto *Def = getLocalLiveOutMIDef(MBB, PhysReg))
@@ -479,17 +496,17 @@ bool ReachingDefAnalysis::isRegDefinedAfter(MachineInstr *MI,
return false;
}
-bool
-ReachingDefAnalysis::isReachingDefLiveOut(MachineInstr *MI, int PhysReg) const {
+bool ReachingDefAnalysis::isReachingDefLiveOut(MachineInstr *MI,
+ MCRegister PhysReg) const {
MachineBasicBlock *MBB = MI->getParent();
LivePhysRegs LiveRegs(*TRI);
LiveRegs.addLiveOuts(*MBB);
if (!LiveRegs.contains(PhysReg))
return false;
- MachineInstr *Last = &MBB->back();
+ auto Last = MBB->getLastNonDebugInstr();
int Def = getReachingDef(MI, PhysReg);
- if (getReachingDef(Last, PhysReg) != Def)
+ if (Last != MBB->end() && getReachingDef(&*Last, PhysReg) != Def)
return false;
// Finally check that the last instruction doesn't redefine the register.
@@ -500,18 +517,22 @@ ReachingDefAnalysis::isReachingDefLiveOut(MachineInstr *MI, int PhysReg) const {
return true;
}
-MachineInstr* ReachingDefAnalysis::getLocalLiveOutMIDef(MachineBasicBlock *MBB,
- int PhysReg) const {
+MachineInstr *
+ReachingDefAnalysis::getLocalLiveOutMIDef(MachineBasicBlock *MBB,
+ MCRegister PhysReg) const {
LivePhysRegs LiveRegs(*TRI);
LiveRegs.addLiveOuts(*MBB);
if (!LiveRegs.contains(PhysReg))
return nullptr;
- MachineInstr *Last = &MBB->back();
- int Def = getReachingDef(Last, PhysReg);
+ auto Last = MBB->getLastNonDebugInstr();
+ if (Last == MBB->end())
+ return nullptr;
+
+ int Def = getReachingDef(&*Last, PhysReg);
for (auto &MO : Last->operands())
if (isValidRegDefOf(MO, PhysReg))
- return Last;
+ return &*Last;
return Def < 0 ? nullptr : getInstFromId(MBB, Def);
}
@@ -528,7 +549,7 @@ static bool mayHaveSideEffects(MachineInstr &MI) {
template<typename Iterator>
bool ReachingDefAnalysis::isSafeToMove(MachineInstr *From,
MachineInstr *To) const {
- if (From->getParent() != To->getParent())
+ if (From->getParent() != To->getParent() || From == To)
return false;
SmallSet<int, 2> Defs;
@@ -557,12 +578,22 @@ bool ReachingDefAnalysis::isSafeToMove(MachineInstr *From,
bool ReachingDefAnalysis::isSafeToMoveForwards(MachineInstr *From,
MachineInstr *To) const {
- return isSafeToMove<MachineBasicBlock::reverse_iterator>(From, To);
+ using Iterator = MachineBasicBlock::iterator;
+ // Walk forwards until we find the instruction.
+ for (auto I = Iterator(From), E = From->getParent()->end(); I != E; ++I)
+ if (&*I == To)
+ return isSafeToMove<Iterator>(From, To);
+ return false;
}
bool ReachingDefAnalysis::isSafeToMoveBackwards(MachineInstr *From,
MachineInstr *To) const {
- return isSafeToMove<MachineBasicBlock::iterator>(From, To);
+ using Iterator = MachineBasicBlock::reverse_iterator;
+ // Walk backwards until we find the instruction.
+ for (auto I = Iterator(From), E = From->getParent()->rend(); I != E; ++I)
+ if (&*I == To)
+ return isSafeToMove<Iterator>(From, To);
+ return false;
}
bool ReachingDefAnalysis::isSafeToRemove(MachineInstr *MI,
@@ -612,7 +643,10 @@ ReachingDefAnalysis::isSafeToRemove(MachineInstr *MI, InstSet &Visited,
void ReachingDefAnalysis::collectKilledOperands(MachineInstr *MI,
InstSet &Dead) const {
Dead.insert(MI);
- auto IsDead = [this, &Dead](MachineInstr *Def, int PhysReg) {
+ auto IsDead = [this, &Dead](MachineInstr *Def, MCRegister PhysReg) {
+ if (mayHaveSideEffects(*Def))
+ return false;
+
unsigned LiveDefs = 0;
for (auto &MO : Def->operands()) {
if (!isValidRegDef(MO))
@@ -642,18 +676,18 @@ void ReachingDefAnalysis::collectKilledOperands(MachineInstr *MI,
}
bool ReachingDefAnalysis::isSafeToDefRegAt(MachineInstr *MI,
- int PhysReg) const {
+ MCRegister PhysReg) const {
SmallPtrSet<MachineInstr*, 1> Ignore;
return isSafeToDefRegAt(MI, PhysReg, Ignore);
}
-bool ReachingDefAnalysis::isSafeToDefRegAt(MachineInstr *MI, int PhysReg,
+bool ReachingDefAnalysis::isSafeToDefRegAt(MachineInstr *MI, MCRegister PhysReg,
InstSet &Ignore) const {
// Check for any uses of the register after MI.
if (isRegUsedAfter(MI, PhysReg)) {
if (auto *Def = getReachingLocalMIDef(MI, PhysReg)) {
SmallPtrSet<MachineInstr*, 2> Uses;
- getReachingLocalUses(Def, PhysReg, Uses);
+ getGlobalUses(Def, PhysReg, Uses);
for (auto *Use : Uses)
if (!Ignore.count(Use))
return false;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.cpp
index d22826853672..aa749ca43e74 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.cpp
@@ -73,7 +73,7 @@ void RegAllocBase::seedLiveRegs() {
NamedRegionTimer T("seed", "Seed Live Regs", TimerGroupName,
TimerGroupDescription, TimePassesIsEnabled);
for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = Register::index2VirtReg(i);
+ Register Reg = Register::index2VirtReg(i);
if (MRI->reg_nodbg_empty(Reg))
continue;
enqueue(&LIS->getInterval(Reg));
@@ -87,13 +87,13 @@ void RegAllocBase::allocatePhysRegs() {
// Continue assigning vregs one at a time to available physical registers.
while (LiveInterval *VirtReg = dequeue()) {
- assert(!VRM->hasPhys(VirtReg->reg) && "Register already assigned");
+ assert(!VRM->hasPhys(VirtReg->reg()) && "Register already assigned");
// Unused registers can appear when the spiller coalesces snippets.
- if (MRI->reg_nodbg_empty(VirtReg->reg)) {
+ if (MRI->reg_nodbg_empty(VirtReg->reg())) {
LLVM_DEBUG(dbgs() << "Dropping unused " << *VirtReg << '\n');
aboutToRemoveInterval(*VirtReg);
- LIS->removeInterval(VirtReg->reg);
+ LIS->removeInterval(VirtReg->reg());
continue;
}
@@ -104,21 +104,22 @@ void RegAllocBase::allocatePhysRegs() {
// register if possible and populate a list of new live intervals that
// result from splitting.
LLVM_DEBUG(dbgs() << "\nselectOrSplit "
- << TRI->getRegClassName(MRI->getRegClass(VirtReg->reg))
- << ':' << *VirtReg << " w=" << VirtReg->weight << '\n');
+ << TRI->getRegClassName(MRI->getRegClass(VirtReg->reg()))
+ << ':' << *VirtReg << " w=" << VirtReg->weight() << '\n');
using VirtRegVec = SmallVector<Register, 4>;
VirtRegVec SplitVRegs;
- unsigned AvailablePhysReg = selectOrSplit(*VirtReg, SplitVRegs);
+ MCRegister AvailablePhysReg = selectOrSplit(*VirtReg, SplitVRegs);
if (AvailablePhysReg == ~0u) {
// selectOrSplit failed to find a register!
// Probably caused by an inline asm.
MachineInstr *MI = nullptr;
for (MachineRegisterInfo::reg_instr_iterator
- I = MRI->reg_instr_begin(VirtReg->reg), E = MRI->reg_instr_end();
- I != E; ) {
+ I = MRI->reg_instr_begin(VirtReg->reg()),
+ E = MRI->reg_instr_end();
+ I != E;) {
MI = &*(I++);
if (MI->isInlineAsm())
break;
@@ -133,28 +134,29 @@ void RegAllocBase::allocatePhysRegs() {
report_fatal_error("ran out of registers during register allocation");
}
// Keep going after reporting the error.
- VRM->assignVirt2Phys(VirtReg->reg,
- RegClassInfo.getOrder(MRI->getRegClass(VirtReg->reg)).front());
+ VRM->assignVirt2Phys(
+ VirtReg->reg(),
+ RegClassInfo.getOrder(MRI->getRegClass(VirtReg->reg())).front());
continue;
}
if (AvailablePhysReg)
Matrix->assign(*VirtReg, AvailablePhysReg);
- for (unsigned Reg : SplitVRegs) {
+ for (Register Reg : SplitVRegs) {
assert(LIS->hasInterval(Reg));
LiveInterval *SplitVirtReg = &LIS->getInterval(Reg);
- assert(!VRM->hasPhys(SplitVirtReg->reg) && "Register already assigned");
- if (MRI->reg_nodbg_empty(SplitVirtReg->reg)) {
+ assert(!VRM->hasPhys(SplitVirtReg->reg()) && "Register already assigned");
+ if (MRI->reg_nodbg_empty(SplitVirtReg->reg())) {
assert(SplitVirtReg->empty() && "Non-empty but used interval");
LLVM_DEBUG(dbgs() << "not queueing unused " << *SplitVirtReg << '\n');
aboutToRemoveInterval(*SplitVirtReg);
- LIS->removeInterval(SplitVirtReg->reg);
+ LIS->removeInterval(SplitVirtReg->reg());
continue;
}
LLVM_DEBUG(dbgs() << "queuing new interval: " << *SplitVirtReg << "\n");
- assert(Register::isVirtualRegister(SplitVirtReg->reg) &&
+ assert(Register::isVirtualRegister(SplitVirtReg->reg()) &&
"expect split value in virtual register");
enqueue(SplitVirtReg);
++NumNewQueued;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.h b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.h
index 8e931eaae99a..3144605345e9 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.h
@@ -101,8 +101,8 @@ protected:
// Each call must guarantee forward progess by returning an available PhysReg
// or new set of split live virtual registers. It is up to the splitter to
// converge quickly toward fully spilled live ranges.
- virtual Register selectOrSplit(LiveInterval &VirtReg,
- SmallVectorImpl<Register> &splitLVRs) = 0;
+ virtual MCRegister selectOrSplit(LiveInterval &VirtReg,
+ SmallVectorImpl<Register> &splitLVRs) = 0;
// Use this group name for NamedRegionTimer.
static const char TimerGroupName[];
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBasic.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBasic.cpp
index 5009bcc0a397..8f2cb48c5d69 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBasic.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBasic.cpp
@@ -46,7 +46,7 @@ static RegisterRegAlloc basicRegAlloc("basic", "basic register allocator",
namespace {
struct CompSpillWeight {
bool operator()(LiveInterval *A, LiveInterval *B) const {
- return A->weight < B->weight;
+ return A->weight() < B->weight();
}
};
}
@@ -72,8 +72,8 @@ class RABasic : public MachineFunctionPass,
// selectOrSplit().
BitVector UsableRegs;
- bool LRE_CanEraseVirtReg(unsigned) override;
- void LRE_WillShrinkVirtReg(unsigned) override;
+ bool LRE_CanEraseVirtReg(Register) override;
+ void LRE_WillShrinkVirtReg(Register) override;
public:
RABasic();
@@ -100,8 +100,8 @@ public:
return LI;
}
- Register selectOrSplit(LiveInterval &VirtReg,
- SmallVectorImpl<Register> &SplitVRegs) override;
+ MCRegister selectOrSplit(LiveInterval &VirtReg,
+ SmallVectorImpl<Register> &SplitVRegs) override;
/// Perform register allocation.
bool runOnMachineFunction(MachineFunction &mf) override;
@@ -111,10 +111,15 @@ public:
MachineFunctionProperties::Property::NoPHIs);
}
+ MachineFunctionProperties getClearedProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::IsSSA);
+ }
+
// Helper for spilling all live virtual registers currently unified under preg
// that interfere with the most recently queried lvr. Return true if spilling
// was successful, and append any new spilled/split intervals to splitLVRs.
- bool spillInterferences(LiveInterval &VirtReg, Register PhysReg,
+ bool spillInterferences(LiveInterval &VirtReg, MCRegister PhysReg,
SmallVectorImpl<Register> &SplitVRegs);
static char ID;
@@ -141,7 +146,7 @@ INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix)
INITIALIZE_PASS_END(RABasic, "regallocbasic", "Basic Register Allocator", false,
false)
-bool RABasic::LRE_CanEraseVirtReg(unsigned VirtReg) {
+bool RABasic::LRE_CanEraseVirtReg(Register VirtReg) {
LiveInterval &LI = LIS->getInterval(VirtReg);
if (VRM->hasPhys(VirtReg)) {
Matrix->unassign(LI);
@@ -156,7 +161,7 @@ bool RABasic::LRE_CanEraseVirtReg(unsigned VirtReg) {
return false;
}
-void RABasic::LRE_WillShrinkVirtReg(unsigned VirtReg) {
+void RABasic::LRE_WillShrinkVirtReg(Register VirtReg) {
if (!VRM->hasPhys(VirtReg))
return;
@@ -201,7 +206,7 @@ void RABasic::releaseMemory() {
// Spill or split all live virtual registers currently unified under PhysReg
// that interfere with VirtReg. The newly spilled or split live intervals are
// returned by appending them to SplitVRegs.
-bool RABasic::spillInterferences(LiveInterval &VirtReg, Register PhysReg,
+bool RABasic::spillInterferences(LiveInterval &VirtReg, MCRegister PhysReg,
SmallVectorImpl<Register> &SplitVRegs) {
// Record each interference and determine if all are spillable before mutating
// either the union or live intervals.
@@ -213,7 +218,7 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, Register PhysReg,
Q.collectInterferingVRegs();
for (unsigned i = Q.interferingVRegs().size(); i; --i) {
LiveInterval *Intf = Q.interferingVRegs()[i - 1];
- if (!Intf->isSpillable() || Intf->weight > VirtReg.weight)
+ if (!Intf->isSpillable() || Intf->weight() > VirtReg.weight())
return false;
Intfs.push_back(Intf);
}
@@ -227,7 +232,7 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, Register PhysReg,
LiveInterval &Spill = *Intfs[i];
// Skip duplicates.
- if (!VRM->hasPhys(Spill.reg))
+ if (!VRM->hasPhys(Spill.reg()))
continue;
// Deallocate the interfering vreg by removing it from the union.
@@ -253,14 +258,16 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, Register PhysReg,
// |vregs| * |machineregs|. And since the number of interference tests is
// minimal, there is no value in caching them outside the scope of
// selectOrSplit().
-Register RABasic::selectOrSplit(LiveInterval &VirtReg,
- SmallVectorImpl<Register> &SplitVRegs) {
+MCRegister RABasic::selectOrSplit(LiveInterval &VirtReg,
+ SmallVectorImpl<Register> &SplitVRegs) {
// Populate a list of physical register spill candidates.
- SmallVector<Register, 8> PhysRegSpillCands;
+ SmallVector<MCRegister, 8> PhysRegSpillCands;
// Check for an available register in this class.
- AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix);
- while (Register PhysReg = Order.next()) {
+ auto Order =
+ AllocationOrder::create(VirtReg.reg(), *VRM, RegClassInfo, Matrix);
+ for (MCRegister PhysReg : Order) {
+ assert(PhysReg.isValid());
// Check for interference in PhysReg
switch (Matrix->checkInterference(VirtReg, PhysReg)) {
case LiveRegMatrix::IK_Free:
@@ -279,8 +286,9 @@ Register RABasic::selectOrSplit(LiveInterval &VirtReg,
}
// Try to spill another interfering reg with less spill weight.
- for (SmallVectorImpl<Register>::iterator PhysRegI = PhysRegSpillCands.begin(),
- PhysRegE = PhysRegSpillCands.end(); PhysRegI != PhysRegE; ++PhysRegI) {
+ for (auto PhysRegI = PhysRegSpillCands.begin(),
+ PhysRegE = PhysRegSpillCands.end();
+ PhysRegI != PhysRegE; ++PhysRegI) {
if (!spillInterferences(VirtReg, *PhysRegI, SplitVRegs))
continue;
@@ -310,10 +318,9 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) {
RegAllocBase::init(getAnalysis<VirtRegMap>(),
getAnalysis<LiveIntervals>(),
getAnalysis<LiveRegMatrix>());
-
- calculateSpillWeightsAndHints(*LIS, *MF, VRM,
- getAnalysis<MachineLoopInfo>(),
- getAnalysis<MachineBlockFrequencyInfo>());
+ VirtRegAuxInfo VRAI(*MF, *LIS, *VRM, getAnalysis<MachineLoopInfo>(),
+ getAnalysis<MachineBlockFrequencyInfo>());
+ VRAI.calculateSpillWeightsAndHints();
SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM));
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp
index cf3eaba23bee..6e548d4a93c8 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -56,6 +56,10 @@ STATISTIC(NumStores, "Number of stores added");
STATISTIC(NumLoads , "Number of loads added");
STATISTIC(NumCoalesced, "Number of copies coalesced");
+// FIXME: Remove this switch when all testcases are fixed!
+static cl::opt<bool> IgnoreMissingDefs("rafast-ignore-missing-defs",
+ cl::Hidden);
+
static RegisterRegAlloc
fastRegAlloc("fast", "fast register allocator", createFastRegisterAllocator);
@@ -85,8 +89,9 @@ namespace {
MachineInstr *LastUse = nullptr; ///< Last instr to use reg.
Register VirtReg; ///< Virtual register number.
MCPhysReg PhysReg = 0; ///< Currently held here.
- unsigned short LastOpNum = 0; ///< OpNum on LastUse.
- bool Dirty = false; ///< Register needs spill.
+ bool LiveOut = false; ///< Register is possibly live out.
+ bool Reloaded = false; ///< Register was reloaded.
+ bool Error = false; ///< Could not allocate.
explicit LiveReg(Register VirtReg) : VirtReg(VirtReg) {}
@@ -100,44 +105,51 @@ namespace {
/// available in a physical register.
LiveRegMap LiveVirtRegs;
+ /// Stores assigned virtual registers present in the bundle MI.
+ DenseMap<Register, MCPhysReg> BundleVirtRegsMap;
+
DenseMap<unsigned, SmallVector<MachineInstr *, 2>> LiveDbgValueMap;
+ /// List of DBG_VALUE that we encountered without the vreg being assigned
+ /// because they were placed after the last use of the vreg.
+ DenseMap<unsigned, SmallVector<MachineInstr *, 1>> DanglingDbgValues;
/// Has a bit set for every virtual register for which it was determined
/// that it is alive across blocks.
BitVector MayLiveAcrossBlocks;
- /// State of a physical register.
- enum RegState {
- /// A disabled register is not available for allocation, but an alias may
- /// be in use. A register can only be moved out of the disabled state if
- /// all aliases are disabled.
- regDisabled,
-
+ /// State of a register unit.
+ enum RegUnitState {
/// A free register is not currently in use and can be allocated
/// immediately without checking aliases.
regFree,
- /// A reserved register has been assigned explicitly (e.g., setting up a
- /// call parameter), and it remains reserved until it is used.
- regReserved
+ /// A pre-assigned register has been assigned before register allocation
+ /// (e.g., setting up a call parameter).
+ regPreAssigned,
+
+ /// Used temporarily in reloadAtBegin() to mark register units that are
+ /// live-in to the basic block.
+ regLiveIn,
/// A register state may also be a virtual register number, indication
/// that the physical register is currently allocated to a virtual
/// register. In that case, LiveVirtRegs contains the inverse mapping.
};
- /// Maps each physical register to a RegState enum or a virtual register.
- std::vector<unsigned> PhysRegState;
+ /// Maps each physical register to a RegUnitState enum or virtual register.
+ std::vector<unsigned> RegUnitStates;
- SmallVector<Register, 16> VirtDead;
SmallVector<MachineInstr *, 32> Coalesced;
using RegUnitSet = SparseSet<uint16_t, identity<uint16_t>>;
/// Set of register units that are used in the current instruction, and so
/// cannot be allocated.
RegUnitSet UsedInInstr;
+ RegUnitSet PhysRegUses;
+ SmallVector<uint16_t, 8> DefOperandIndexes;
void setPhysRegState(MCPhysReg PhysReg, unsigned NewState);
+ bool isPhysRegFree(MCPhysReg PhysReg) const;
/// Mark a physreg as used in this instruction.
void markRegUsedInInstr(MCPhysReg PhysReg) {
@@ -146,13 +158,29 @@ namespace {
}
/// Check if a physreg or any of its aliases are used in this instruction.
- bool isRegUsedInInstr(MCPhysReg PhysReg) const {
- for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units)
+ bool isRegUsedInInstr(MCPhysReg PhysReg, bool LookAtPhysRegUses) const {
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
if (UsedInInstr.count(*Units))
return true;
+ if (LookAtPhysRegUses && PhysRegUses.count(*Units))
+ return true;
+ }
return false;
}
+ /// Mark physical register as being used in a register use operand.
+ /// This is only used by the special livethrough handling code.
+ void markPhysRegUsedInInstr(MCPhysReg PhysReg) {
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units)
+ PhysRegUses.insert(*Units);
+ }
+
+ /// Remove mark of physical register being used in the instruction.
+ void unmarkRegUsedInInstr(MCPhysReg PhysReg) {
+ for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units)
+ UsedInInstr.erase(*Units);
+ }
+
enum : unsigned {
spillClean = 50,
spillDirty = 100,
@@ -178,27 +206,29 @@ namespace {
MachineFunctionProperties::Property::NoVRegs);
}
+ MachineFunctionProperties getClearedProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::IsSSA);
+ }
+
private:
bool runOnMachineFunction(MachineFunction &MF) override;
void allocateBasicBlock(MachineBasicBlock &MBB);
+
+ void addRegClassDefCounts(std::vector<unsigned> &RegClassDefCounts,
+ Register Reg) const;
+
void allocateInstruction(MachineInstr &MI);
void handleDebugValue(MachineInstr &MI);
- void handleThroughOperands(MachineInstr &MI,
- SmallVectorImpl<Register> &VirtDead);
- bool isLastUseOfLocalReg(const MachineOperand &MO) const;
-
- void addKillFlag(const LiveReg &LRI);
- void killVirtReg(LiveReg &LR);
- void killVirtReg(Register VirtReg);
- void spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR);
- void spillVirtReg(MachineBasicBlock::iterator MI, Register VirtReg);
-
- void usePhysReg(MachineOperand &MO);
- void definePhysReg(MachineBasicBlock::iterator MI, MCPhysReg PhysReg,
- RegState NewState);
+ void handleBundle(MachineInstr &MI);
+
+ bool usePhysReg(MachineInstr &MI, MCPhysReg PhysReg);
+ bool definePhysReg(MachineInstr &MI, MCPhysReg PhysReg);
+ bool displacePhysReg(MachineInstr &MI, MCPhysReg PhysReg);
+ void freePhysReg(MCPhysReg PhysReg);
+
unsigned calcSpillCost(MCPhysReg PhysReg) const;
- void assignVirtToPhysReg(LiveReg &, MCPhysReg PhysReg);
LiveRegMap::iterator findLiveVirtReg(Register VirtReg) {
return LiveVirtRegs.find(Register::virtReg2Index(VirtReg));
@@ -208,28 +238,38 @@ namespace {
return LiveVirtRegs.find(Register::virtReg2Index(VirtReg));
}
- void allocVirtReg(MachineInstr &MI, LiveReg &LR, Register Hint);
+ void assignVirtToPhysReg(MachineInstr &MI, LiveReg &, MCPhysReg PhysReg);
+ void allocVirtReg(MachineInstr &MI, LiveReg &LR, Register Hint,
+ bool LookAtPhysRegUses = false);
void allocVirtRegUndef(MachineOperand &MO);
- MCPhysReg defineVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg,
- Register Hint);
- LiveReg &reloadVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg,
- Register Hint);
- void spillAll(MachineBasicBlock::iterator MI, bool OnlyLiveOut);
- bool setPhysReg(MachineInstr &MI, MachineOperand &MO, MCPhysReg PhysReg);
+ void assignDanglingDebugValues(MachineInstr &Def, Register VirtReg,
+ MCPhysReg Reg);
+ void defineLiveThroughVirtReg(MachineInstr &MI, unsigned OpNum,
+ Register VirtReg);
+ void defineVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg,
+ bool LookAtPhysRegUses = false);
+ void useVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg);
+
+ MachineBasicBlock::iterator
+ getMBBBeginInsertionPoint(MachineBasicBlock &MBB,
+ SmallSet<Register, 2> &PrologLiveIns) const;
+
+ void reloadAtBegin(MachineBasicBlock &MBB);
+ void setPhysReg(MachineInstr &MI, MachineOperand &MO, MCPhysReg PhysReg);
Register traceCopies(Register VirtReg) const;
Register traceCopyChain(Register Reg) const;
int getStackSpaceFor(Register VirtReg);
void spill(MachineBasicBlock::iterator Before, Register VirtReg,
- MCPhysReg AssignedReg, bool Kill);
+ MCPhysReg AssignedReg, bool Kill, bool LiveOut);
void reload(MachineBasicBlock::iterator Before, Register VirtReg,
MCPhysReg PhysReg);
bool mayLiveOut(Register VirtReg);
bool mayLiveIn(Register VirtReg);
- void dumpState();
+ void dumpState() const;
};
} // end anonymous namespace
@@ -240,7 +280,16 @@ INITIALIZE_PASS(RegAllocFast, "regallocfast", "Fast Register Allocator", false,
false)
void RegAllocFast::setPhysRegState(MCPhysReg PhysReg, unsigned NewState) {
- PhysRegState[PhysReg] = NewState;
+ for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI)
+ RegUnitStates[*UI] = NewState;
+}
+
+bool RegAllocFast::isPhysRegFree(MCPhysReg PhysReg) const {
+ for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) {
+ if (RegUnitStates[*UI] != regFree)
+ return false;
+ }
+ return true;
}
/// This allocates space for the specified virtual register to be held on the
@@ -263,6 +312,20 @@ int RegAllocFast::getStackSpaceFor(Register VirtReg) {
return FrameIdx;
}
+static bool dominates(MachineBasicBlock &MBB,
+ MachineBasicBlock::const_iterator A,
+ MachineBasicBlock::const_iterator B) {
+ auto MBBEnd = MBB.end();
+ if (B == MBBEnd)
+ return true;
+
+ MachineBasicBlock::const_iterator I = MBB.begin();
+ for (; &*I != A && &*I != B; ++I)
+ ;
+
+ return &*I == A;
+}
+
/// Returns false if \p VirtReg is known to not live out of the current block.
bool RegAllocFast::mayLiveOut(Register VirtReg) {
if (MayLiveAcrossBlocks.test(Register::virtReg2Index(VirtReg))) {
@@ -270,23 +333,38 @@ bool RegAllocFast::mayLiveOut(Register VirtReg) {
return !MBB->succ_empty();
}
- // If this block loops back to itself, it would be necessary to check whether
- // the use comes after the def.
+ const MachineInstr *SelfLoopDef = nullptr;
+
+ // If this block loops back to itself, it is necessary to check whether the
+ // use comes after the def.
if (MBB->isSuccessor(MBB)) {
- MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
- return true;
+ SelfLoopDef = MRI->getUniqueVRegDef(VirtReg);
+ if (!SelfLoopDef) {
+ MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
+ return true;
+ }
}
// See if the first \p Limit uses of the register are all in the current
// block.
static const unsigned Limit = 8;
unsigned C = 0;
- for (const MachineInstr &UseInst : MRI->reg_nodbg_instructions(VirtReg)) {
+ for (const MachineInstr &UseInst : MRI->use_nodbg_instructions(VirtReg)) {
if (UseInst.getParent() != MBB || ++C >= Limit) {
MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
// Cannot be live-out if there are no successors.
return !MBB->succ_empty();
}
+
+ if (SelfLoopDef) {
+ // Try to handle some simple cases to avoid spilling and reloading every
+ // value inside a self looping block.
+ if (SelfLoopDef == &UseInst ||
+ !dominates(*MBB, SelfLoopDef->getIterator(), UseInst.getIterator())) {
+ MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
+ return true;
+ }
+ }
}
return false;
@@ -313,7 +391,7 @@ bool RegAllocFast::mayLiveIn(Register VirtReg) {
/// Insert spill instruction for \p AssignedReg before \p Before. Update
/// DBG_VALUEs with \p VirtReg operands with the stack slot.
void RegAllocFast::spill(MachineBasicBlock::iterator Before, Register VirtReg,
- MCPhysReg AssignedReg, bool Kill) {
+ MCPhysReg AssignedReg, bool Kill, bool LiveOut) {
LLVM_DEBUG(dbgs() << "Spilling " << printReg(VirtReg, TRI)
<< " in " << printReg(AssignedReg, TRI));
int FI = getStackSpaceFor(VirtReg);
@@ -323,15 +401,32 @@ void RegAllocFast::spill(MachineBasicBlock::iterator Before, Register VirtReg,
TII->storeRegToStackSlot(*MBB, Before, AssignedReg, Kill, FI, &RC, TRI);
++NumStores;
- // If this register is used by DBG_VALUE then insert new DBG_VALUE to
- // identify spilled location as the place to find corresponding variable's
- // value.
+ MachineBasicBlock::iterator FirstTerm = MBB->getFirstTerminator();
+
+ // When we spill a virtual register, we will have spill instructions behind
+ // every definition of it, meaning we can switch all the DBG_VALUEs over
+ // to just reference the stack slot.
SmallVectorImpl<MachineInstr *> &LRIDbgValues = LiveDbgValueMap[VirtReg];
for (MachineInstr *DBG : LRIDbgValues) {
MachineInstr *NewDV = buildDbgValueForSpill(*MBB, Before, *DBG, FI);
assert(NewDV->getParent() == MBB && "dangling parent pointer");
(void)NewDV;
LLVM_DEBUG(dbgs() << "Inserting debug info due to spill:\n" << *NewDV);
+
+ if (LiveOut) {
+ // We need to insert a DBG_VALUE at the end of the block if the spill slot
+ // is live out, but there is another use of the value after the
+ // spill. This will allow LiveDebugValues to see the correct live out
+ // value to propagate to the successors.
+ MachineInstr *ClonedDV = MBB->getParent()->CloneMachineInstr(NewDV);
+ MBB->insert(FirstTerm, ClonedDV);
+ LLVM_DEBUG(dbgs() << "Cloning debug info due to live out spill\n");
+ }
+
+ // Rewrite unassigned dbg_values to use the stack slot.
+ MachineOperand &MO = DBG->getOperand(0);
+ if (MO.isReg() && MO.getReg() == 0)
+ updateDbgValueForSpill(*DBG, FI);
}
// Now this register is spilled there is should not be any DBG_VALUE
// pointing to this register because they are all pointing to spilled value
@@ -350,100 +445,75 @@ void RegAllocFast::reload(MachineBasicBlock::iterator Before, Register VirtReg,
++NumLoads;
}
-/// Return true if MO is the only remaining reference to its virtual register,
-/// and it is guaranteed to be a block-local register.
-bool RegAllocFast::isLastUseOfLocalReg(const MachineOperand &MO) const {
- // If the register has ever been spilled or reloaded, we conservatively assume
- // it is a global register used in multiple blocks.
- if (StackSlotForVirtReg[MO.getReg()] != -1)
- return false;
-
- // Check that the use/def chain has exactly one operand - MO.
- MachineRegisterInfo::reg_nodbg_iterator I = MRI->reg_nodbg_begin(MO.getReg());
- if (&*I != &MO)
- return false;
- return ++I == MRI->reg_nodbg_end();
-}
+/// Get basic block begin insertion point.
+/// This is not just MBB.begin() because surprisingly we have EH_LABEL
+/// instructions marking the begin of a basic block. This means we must insert
+/// new instructions after such labels...
+MachineBasicBlock::iterator
+RegAllocFast::getMBBBeginInsertionPoint(
+ MachineBasicBlock &MBB, SmallSet<Register, 2> &PrologLiveIns) const {
+ MachineBasicBlock::iterator I = MBB.begin();
+ while (I != MBB.end()) {
+ if (I->isLabel()) {
+ ++I;
+ continue;
+ }
-/// Set kill flags on last use of a virtual register.
-void RegAllocFast::addKillFlag(const LiveReg &LR) {
- if (!LR.LastUse) return;
- MachineOperand &MO = LR.LastUse->getOperand(LR.LastOpNum);
- if (MO.isUse() && !LR.LastUse->isRegTiedToDefOperand(LR.LastOpNum)) {
- if (MO.getReg() == LR.PhysReg)
- MO.setIsKill();
- // else, don't do anything we are problably redefining a
- // subreg of this register and given we don't track which
- // lanes are actually dead, we cannot insert a kill flag here.
- // Otherwise we may end up in a situation like this:
- // ... = (MO) physreg:sub1, implicit killed physreg
- // ... <== Here we would allow later pass to reuse physreg:sub1
- // which is potentially wrong.
- // LR:sub0 = ...
- // ... = LR.sub1 <== This is going to use physreg:sub1
- }
-}
+ // Most reloads should be inserted after prolog instructions.
+ if (!TII->isBasicBlockPrologue(*I))
+ break;
-/// Mark virtreg as no longer available.
-void RegAllocFast::killVirtReg(LiveReg &LR) {
- addKillFlag(LR);
- assert(PhysRegState[LR.PhysReg] == LR.VirtReg &&
- "Broken RegState mapping");
- setPhysRegState(LR.PhysReg, regFree);
- LR.PhysReg = 0;
-}
+ // However if a prolog instruction reads a register that needs to be
+ // reloaded, the reload should be inserted before the prolog.
+ for (MachineOperand &MO : I->operands()) {
+ if (MO.isReg())
+ PrologLiveIns.insert(MO.getReg());
+ }
-/// Mark virtreg as no longer available.
-void RegAllocFast::killVirtReg(Register VirtReg) {
- assert(Register::isVirtualRegister(VirtReg) &&
- "killVirtReg needs a virtual register");
- LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
- if (LRI != LiveVirtRegs.end() && LRI->PhysReg)
- killVirtReg(*LRI);
-}
+ ++I;
+ }
-/// This method spills the value specified by VirtReg into the corresponding
-/// stack slot if needed.
-void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI,
- Register VirtReg) {
- assert(Register::isVirtualRegister(VirtReg) &&
- "Spilling a physical register is illegal!");
- LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
- assert(LRI != LiveVirtRegs.end() && LRI->PhysReg &&
- "Spilling unmapped virtual register");
- spillVirtReg(MI, *LRI);
+ return I;
}
-/// Do the actual work of spilling.
-void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR) {
- assert(PhysRegState[LR.PhysReg] == LR.VirtReg && "Broken RegState mapping");
+/// Reload all currently assigned virtual registers.
+void RegAllocFast::reloadAtBegin(MachineBasicBlock &MBB) {
+ if (LiveVirtRegs.empty())
+ return;
- if (LR.Dirty) {
- // If this physreg is used by the instruction, we want to kill it on the
- // instruction, not on the spill.
- bool SpillKill = MachineBasicBlock::iterator(LR.LastUse) != MI;
- LR.Dirty = false;
+ for (MachineBasicBlock::RegisterMaskPair P : MBB.liveins()) {
+ MCPhysReg Reg = P.PhysReg;
+ // Set state to live-in. This possibly overrides mappings to virtual
+ // registers but we don't care anymore at this point.
+ setPhysRegState(Reg, regLiveIn);
+ }
- spill(MI, LR.VirtReg, LR.PhysReg, SpillKill);
- if (SpillKill)
- LR.LastUse = nullptr; // Don't kill register again
- }
- killVirtReg(LR);
-}
+ SmallSet<Register, 2> PrologLiveIns;
-/// Spill all dirty virtregs without killing them.
-void RegAllocFast::spillAll(MachineBasicBlock::iterator MI, bool OnlyLiveOut) {
- if (LiveVirtRegs.empty())
- return;
// The LiveRegMap is keyed by an unsigned (the virtreg number), so the order
// of spilling here is deterministic, if arbitrary.
- for (LiveReg &LR : LiveVirtRegs) {
- if (!LR.PhysReg)
+ MachineBasicBlock::iterator InsertBefore
+ = getMBBBeginInsertionPoint(MBB, PrologLiveIns);
+ for (const LiveReg &LR : LiveVirtRegs) {
+ MCPhysReg PhysReg = LR.PhysReg;
+ if (PhysReg == 0)
continue;
- if (OnlyLiveOut && !mayLiveOut(LR.VirtReg))
+
+ MCRegister FirstUnit = *MCRegUnitIterator(PhysReg, TRI);
+ if (RegUnitStates[FirstUnit] == regLiveIn)
continue;
- spillVirtReg(MI, LR);
+
+ assert((&MBB != &MBB.getParent()->front() || IgnoreMissingDefs) &&
+ "no reload in start block. Missing vreg def?");
+
+ if (PrologLiveIns.count(PhysReg)) {
+ // FIXME: Theoretically this should use an insert point skipping labels
+ // but I'm not sure how labels should interact with prolog instruction
+ // that need reloads.
+ reload(MBB.begin(), LR.VirtReg, PhysReg);
+ } else
+ reload(InsertBefore, LR.VirtReg, PhysReg);
}
LiveVirtRegs.clear();
}
@@ -451,105 +521,73 @@ void RegAllocFast::spillAll(MachineBasicBlock::iterator MI, bool OnlyLiveOut) {
/// Handle the direct use of a physical register. Check that the register is
/// not used by a virtreg. Kill the physreg, marking it free. This may add
/// implicit kills to MO->getParent() and invalidate MO.
-void RegAllocFast::usePhysReg(MachineOperand &MO) {
- // Ignore undef uses.
- if (MO.isUndef())
- return;
+bool RegAllocFast::usePhysReg(MachineInstr &MI, MCPhysReg Reg) {
+ assert(Register::isPhysicalRegister(Reg) && "expected physreg");
+ bool displacedAny = displacePhysReg(MI, Reg);
+ setPhysRegState(Reg, regPreAssigned);
+ markRegUsedInInstr(Reg);
+ return displacedAny;
+}
- Register PhysReg = MO.getReg();
- assert(PhysReg.isPhysical() && "Bad usePhysReg operand");
+bool RegAllocFast::definePhysReg(MachineInstr &MI, MCPhysReg Reg) {
+ bool displacedAny = displacePhysReg(MI, Reg);
+ setPhysRegState(Reg, regPreAssigned);
+ return displacedAny;
+}
- markRegUsedInInstr(PhysReg);
- switch (PhysRegState[PhysReg]) {
- case regDisabled:
- break;
- case regReserved:
- PhysRegState[PhysReg] = regFree;
- LLVM_FALLTHROUGH;
- case regFree:
- MO.setIsKill();
- return;
- default:
- // The physreg was allocated to a virtual register. That means the value we
- // wanted has been clobbered.
- llvm_unreachable("Instruction uses an allocated register");
- }
+/// Mark PhysReg as reserved or free after spilling any virtregs. This is very
+/// similar to defineVirtReg except the physreg is reserved instead of
+/// allocated.
+bool RegAllocFast::displacePhysReg(MachineInstr &MI, MCPhysReg PhysReg) {
+ bool displacedAny = false;
- // Maybe a superregister is reserved?
- for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) {
- MCPhysReg Alias = *AI;
- switch (PhysRegState[Alias]) {
- case regDisabled:
+ for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) {
+ unsigned Unit = *UI;
+ switch (unsigned VirtReg = RegUnitStates[Unit]) {
+ default: {
+ LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
+ assert(LRI != LiveVirtRegs.end() && "datastructures in sync");
+ MachineBasicBlock::iterator ReloadBefore =
+ std::next((MachineBasicBlock::iterator)MI.getIterator());
+ reload(ReloadBefore, VirtReg, LRI->PhysReg);
+
+ setPhysRegState(LRI->PhysReg, regFree);
+ LRI->PhysReg = 0;
+ LRI->Reloaded = true;
+ displacedAny = true;
+ break;
+ }
+ case regPreAssigned:
+ RegUnitStates[Unit] = regFree;
+ displacedAny = true;
break;
- case regReserved:
- // Either PhysReg is a subregister of Alias and we mark the
- // whole register as free, or PhysReg is the superregister of
- // Alias and we mark all the aliases as disabled before freeing
- // PhysReg.
- // In the latter case, since PhysReg was disabled, this means that
- // its value is defined only by physical sub-registers. This check
- // is performed by the assert of the default case in this loop.
- // Note: The value of the superregister may only be partial
- // defined, that is why regDisabled is a valid state for aliases.
- assert((TRI->isSuperRegister(PhysReg, Alias) ||
- TRI->isSuperRegister(Alias, PhysReg)) &&
- "Instruction is not using a subregister of a reserved register");
- LLVM_FALLTHROUGH;
case regFree:
- if (TRI->isSuperRegister(PhysReg, Alias)) {
- // Leave the superregister in the working set.
- setPhysRegState(Alias, regFree);
- MO.getParent()->addRegisterKilled(Alias, TRI, true);
- return;
- }
- // Some other alias was in the working set - clear it.
- setPhysRegState(Alias, regDisabled);
break;
- default:
- llvm_unreachable("Instruction uses an alias of an allocated register");
}
}
-
- // All aliases are disabled, bring register into working set.
- setPhysRegState(PhysReg, regFree);
- MO.setIsKill();
+ return displacedAny;
}
-/// Mark PhysReg as reserved or free after spilling any virtregs. This is very
-/// similar to defineVirtReg except the physreg is reserved instead of
-/// allocated.
-void RegAllocFast::definePhysReg(MachineBasicBlock::iterator MI,
- MCPhysReg PhysReg, RegState NewState) {
- markRegUsedInInstr(PhysReg);
- switch (Register VirtReg = PhysRegState[PhysReg]) {
- case regDisabled:
- break;
- default:
- spillVirtReg(MI, VirtReg);
- LLVM_FALLTHROUGH;
+void RegAllocFast::freePhysReg(MCPhysReg PhysReg) {
+ LLVM_DEBUG(dbgs() << "Freeing " << printReg(PhysReg, TRI) << ':');
+
+ MCRegister FirstUnit = *MCRegUnitIterator(PhysReg, TRI);
+ switch (unsigned VirtReg = RegUnitStates[FirstUnit]) {
case regFree:
- case regReserved:
- setPhysRegState(PhysReg, NewState);
+ LLVM_DEBUG(dbgs() << '\n');
return;
- }
-
- // This is a disabled register, disable all aliases.
- setPhysRegState(PhysReg, NewState);
- for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) {
- MCPhysReg Alias = *AI;
- switch (Register VirtReg = PhysRegState[Alias]) {
- case regDisabled:
- break;
- default:
- spillVirtReg(MI, VirtReg);
- LLVM_FALLTHROUGH;
- case regFree:
- case regReserved:
- setPhysRegState(Alias, regDisabled);
- if (TRI->isSuperRegister(PhysReg, Alias))
- return;
- break;
+ case regPreAssigned:
+ LLVM_DEBUG(dbgs() << '\n');
+ setPhysRegState(PhysReg, regFree);
+ return;
+ default: {
+ LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
+ assert(LRI != LiveVirtRegs.end());
+ LLVM_DEBUG(dbgs() << ' ' << printReg(LRI->VirtReg, TRI) << '\n');
+ setPhysRegState(LRI->PhysReg, regFree);
+ LRI->PhysReg = 0;
}
+ return;
}
}
@@ -558,57 +596,61 @@ void RegAllocFast::definePhysReg(MachineBasicBlock::iterator MI,
/// disabled - it can be allocated directly.
/// \returns spillImpossible when PhysReg or an alias can't be spilled.
unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const {
- if (isRegUsedInInstr(PhysReg)) {
- LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI)
- << " is already used in instr.\n");
- return spillImpossible;
- }
- switch (Register VirtReg = PhysRegState[PhysReg]) {
- case regDisabled:
- break;
- case regFree:
- return 0;
- case regReserved:
- LLVM_DEBUG(dbgs() << printReg(VirtReg, TRI) << " corresponding "
- << printReg(PhysReg, TRI) << " is reserved already.\n");
- return spillImpossible;
- default: {
- LiveRegMap::const_iterator LRI = findLiveVirtReg(VirtReg);
- assert(LRI != LiveVirtRegs.end() && LRI->PhysReg &&
- "Missing VirtReg entry");
- return LRI->Dirty ? spillDirty : spillClean;
- }
- }
-
- // This is a disabled register, add up cost of aliases.
- LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << " is disabled.\n");
- unsigned Cost = 0;
- for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) {
- MCPhysReg Alias = *AI;
- switch (Register VirtReg = PhysRegState[Alias]) {
- case regDisabled:
- break;
+ for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) {
+ switch (unsigned VirtReg = RegUnitStates[*UI]) {
case regFree:
- ++Cost;
break;
- case regReserved:
+ case regPreAssigned:
+ LLVM_DEBUG(dbgs() << "Cannot spill pre-assigned "
+ << printReg(PhysReg, TRI) << '\n');
return spillImpossible;
default: {
- LiveRegMap::const_iterator LRI = findLiveVirtReg(VirtReg);
- assert(LRI != LiveVirtRegs.end() && LRI->PhysReg &&
- "Missing VirtReg entry");
- Cost += LRI->Dirty ? spillDirty : spillClean;
- break;
+ bool SureSpill = StackSlotForVirtReg[VirtReg] != -1 ||
+ findLiveVirtReg(VirtReg)->LiveOut;
+ return SureSpill ? spillClean : spillDirty;
}
}
}
- return Cost;
+ return 0;
+}
+
+void RegAllocFast::assignDanglingDebugValues(MachineInstr &Definition,
+ Register VirtReg, MCPhysReg Reg) {
+ auto UDBGValIter = DanglingDbgValues.find(VirtReg);
+ if (UDBGValIter == DanglingDbgValues.end())
+ return;
+
+ SmallVectorImpl<MachineInstr*> &Dangling = UDBGValIter->second;
+ for (MachineInstr *DbgValue : Dangling) {
+ assert(DbgValue->isDebugValue());
+ MachineOperand &MO = DbgValue->getOperand(0);
+ if (!MO.isReg())
+ continue;
+
+ // Test whether the physreg survives from the definition to the DBG_VALUE.
+ MCPhysReg SetToReg = Reg;
+ unsigned Limit = 20;
+ for (MachineBasicBlock::iterator I = std::next(Definition.getIterator()),
+ E = DbgValue->getIterator(); I != E; ++I) {
+ if (I->modifiesRegister(Reg, TRI) || --Limit == 0) {
+ LLVM_DEBUG(dbgs() << "Register did not survive for " << *DbgValue
+ << '\n');
+ SetToReg = 0;
+ break;
+ }
+ }
+ MO.setReg(SetToReg);
+ if (SetToReg != 0)
+ MO.setIsRenamable();
+ }
+ Dangling.clear();
}
/// This method updates local state so that we know that PhysReg is the
/// proper container for VirtReg now. The physical register must not be used
/// for anything else when this is called.
-void RegAllocFast::assignVirtToPhysReg(LiveReg &LR, MCPhysReg PhysReg) {
+void RegAllocFast::assignVirtToPhysReg(MachineInstr &AtMI, LiveReg &LR,
+ MCPhysReg PhysReg) {
Register VirtReg = LR.VirtReg;
LLVM_DEBUG(dbgs() << "Assigning " << printReg(VirtReg, TRI) << " to "
<< printReg(PhysReg, TRI) << '\n');
@@ -616,6 +658,8 @@ void RegAllocFast::assignVirtToPhysReg(LiveReg &LR, MCPhysReg PhysReg) {
assert(PhysReg != 0 && "Trying to assign no register");
LR.PhysReg = PhysReg;
setPhysRegState(PhysReg, VirtReg);
+
+ assignDanglingDebugValues(AtMI, VirtReg, PhysReg);
}
static bool isCoalescable(const MachineInstr &MI) {
@@ -659,11 +703,10 @@ Register RegAllocFast::traceCopies(Register VirtReg) const {
}
/// Allocates a physical register for VirtReg.
-void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, Register Hint0) {
+void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR,
+ Register Hint0, bool LookAtPhysRegUses) {
const Register VirtReg = LR.VirtReg;
-
- assert(Register::isVirtualRegister(VirtReg) &&
- "Can only allocate virtual registers");
+ assert(LR.PhysReg == 0);
const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
LLVM_DEBUG(dbgs() << "Search register for " << printReg(VirtReg)
@@ -671,41 +714,36 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, Register Hint0) {
<< " with hint " << printReg(Hint0, TRI) << '\n');
// Take hint when possible.
- if (Hint0.isPhysical() && MRI->isAllocatable(Hint0) &&
- RC.contains(Hint0)) {
- // Ignore the hint if we would have to spill a dirty register.
- unsigned Cost = calcSpillCost(Hint0);
- if (Cost < spillDirty) {
+ if (Hint0.isPhysical() && MRI->isAllocatable(Hint0) && RC.contains(Hint0) &&
+ !isRegUsedInInstr(Hint0, LookAtPhysRegUses)) {
+ // Take hint if the register is currently free.
+ if (isPhysRegFree(Hint0)) {
LLVM_DEBUG(dbgs() << "\tPreferred Register 1: " << printReg(Hint0, TRI)
<< '\n');
- if (Cost)
- definePhysReg(MI, Hint0, regFree);
- assignVirtToPhysReg(LR, Hint0);
+ assignVirtToPhysReg(MI, LR, Hint0);
return;
} else {
- LLVM_DEBUG(dbgs() << "\tPreferred Register 1: " << printReg(Hint0, TRI)
- << "occupied\n");
+ LLVM_DEBUG(dbgs() << "\tPreferred Register 0: " << printReg(Hint0, TRI)
+ << " occupied\n");
}
} else {
Hint0 = Register();
}
+
// Try other hint.
Register Hint1 = traceCopies(VirtReg);
- if (Hint1.isPhysical() && MRI->isAllocatable(Hint1) &&
- RC.contains(Hint1) && !isRegUsedInInstr(Hint1)) {
- // Ignore the hint if we would have to spill a dirty register.
- unsigned Cost = calcSpillCost(Hint1);
- if (Cost < spillDirty) {
+ if (Hint1.isPhysical() && MRI->isAllocatable(Hint1) && RC.contains(Hint1) &&
+ !isRegUsedInInstr(Hint1, LookAtPhysRegUses)) {
+ // Take hint if the register is currently free.
+ if (isPhysRegFree(Hint1)) {
LLVM_DEBUG(dbgs() << "\tPreferred Register 0: " << printReg(Hint1, TRI)
- << '\n');
- if (Cost)
- definePhysReg(MI, Hint1, regFree);
- assignVirtToPhysReg(LR, Hint1);
+ << '\n');
+ assignVirtToPhysReg(MI, LR, Hint1);
return;
} else {
- LLVM_DEBUG(dbgs() << "\tPreferred Register 0: " << printReg(Hint1, TRI)
- << "occupied\n");
+ LLVM_DEBUG(dbgs() << "\tPreferred Register 1: " << printReg(Hint1, TRI)
+ << " occupied\n");
}
} else {
Hint1 = Register();
@@ -716,15 +754,20 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, Register Hint0) {
ArrayRef<MCPhysReg> AllocationOrder = RegClassInfo.getOrder(&RC);
for (MCPhysReg PhysReg : AllocationOrder) {
LLVM_DEBUG(dbgs() << "\tRegister: " << printReg(PhysReg, TRI) << ' ');
+ if (isRegUsedInInstr(PhysReg, LookAtPhysRegUses)) {
+ LLVM_DEBUG(dbgs() << "already used in instr.\n");
+ continue;
+ }
+
unsigned Cost = calcSpillCost(PhysReg);
LLVM_DEBUG(dbgs() << "Cost: " << Cost << " BestCost: " << BestCost << '\n');
// Immediate take a register with cost 0.
if (Cost == 0) {
- assignVirtToPhysReg(LR, PhysReg);
+ assignVirtToPhysReg(MI, LR, PhysReg);
return;
}
- if (PhysReg == Hint1 || PhysReg == Hint0)
+ if (PhysReg == Hint0 || PhysReg == Hint1)
Cost -= spillPrefBonus;
if (Cost < BestCost) {
@@ -740,13 +783,14 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, Register Hint0) {
MI.emitError("inline assembly requires more registers than available");
else
MI.emitError("ran out of registers during register allocation");
- definePhysReg(MI, *AllocationOrder.begin(), regFree);
- assignVirtToPhysReg(LR, *AllocationOrder.begin());
+
+ LR.Error = true;
+ LR.PhysReg = 0;
return;
}
- definePhysReg(MI, BestReg, regFree);
- assignVirtToPhysReg(LR, BestReg);
+ displacePhysReg(MI, BestReg);
+ assignVirtToPhysReg(MI, LR, BestReg);
}
void RegAllocFast::allocVirtRegUndef(MachineOperand &MO) {
@@ -774,347 +818,491 @@ void RegAllocFast::allocVirtRegUndef(MachineOperand &MO) {
MO.setIsRenamable(true);
}
-/// Allocates a register for VirtReg and mark it as dirty.
-MCPhysReg RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum,
- Register VirtReg, Register Hint) {
- assert(Register::isVirtualRegister(VirtReg) && "Not a virtual register");
+/// Variation of defineVirtReg() with special handling for livethrough regs
+/// (tied or earlyclobber) that may interfere with preassigned uses.
+void RegAllocFast::defineLiveThroughVirtReg(MachineInstr &MI, unsigned OpNum,
+ Register VirtReg) {
+ LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
+ if (LRI != LiveVirtRegs.end()) {
+ MCPhysReg PrevReg = LRI->PhysReg;
+ if (PrevReg != 0 && isRegUsedInInstr(PrevReg, true)) {
+ LLVM_DEBUG(dbgs() << "Need new assignment for " << printReg(PrevReg, TRI)
+ << " (tied/earlyclobber resolution)\n");
+ freePhysReg(PrevReg);
+ LRI->PhysReg = 0;
+ allocVirtReg(MI, *LRI, 0, true);
+ MachineBasicBlock::iterator InsertBefore =
+ std::next((MachineBasicBlock::iterator)MI.getIterator());
+ LLVM_DEBUG(dbgs() << "Copy " << printReg(LRI->PhysReg, TRI) << " to "
+ << printReg(PrevReg, TRI) << '\n');
+ BuildMI(*MBB, InsertBefore, MI.getDebugLoc(),
+ TII->get(TargetOpcode::COPY), PrevReg)
+ .addReg(LRI->PhysReg, llvm::RegState::Kill);
+ }
+ MachineOperand &MO = MI.getOperand(OpNum);
+ if (MO.getSubReg() && !MO.isUndef()) {
+ LRI->LastUse = &MI;
+ }
+ }
+ return defineVirtReg(MI, OpNum, VirtReg, true);
+}
+
+/// Allocates a register for VirtReg definition. Typically the register is
+/// already assigned from a use of the virtreg, however we still need to
+/// perform an allocation if:
+/// - It is a dead definition without any uses.
+/// - The value is live out and all uses are in different basic blocks.
+void RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum,
+ Register VirtReg, bool LookAtPhysRegUses) {
+ assert(VirtReg.isVirtual() && "Not a virtual register");
+ MachineOperand &MO = MI.getOperand(OpNum);
LiveRegMap::iterator LRI;
bool New;
std::tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg));
- if (!LRI->PhysReg) {
- // If there is no hint, peek at the only use of this register.
- if ((!Hint || !Hint.isPhysical()) &&
- MRI->hasOneNonDBGUse(VirtReg)) {
- const MachineInstr &UseMI = *MRI->use_instr_nodbg_begin(VirtReg);
- // It's a copy, use the destination register as a hint.
- if (UseMI.isCopyLike())
- Hint = UseMI.getOperand(0).getReg();
+ if (New) {
+ if (!MO.isDead()) {
+ if (mayLiveOut(VirtReg)) {
+ LRI->LiveOut = true;
+ } else {
+ // It is a dead def without the dead flag; add the flag now.
+ MO.setIsDead(true);
+ }
}
- allocVirtReg(MI, *LRI, Hint);
- } else if (LRI->LastUse) {
- // Redefining a live register - kill at the last use, unless it is this
- // instruction defining VirtReg multiple times.
- if (LRI->LastUse != &MI || LRI->LastUse->getOperand(LRI->LastOpNum).isUse())
- addKillFlag(*LRI);
}
- assert(LRI->PhysReg && "Register not assigned");
- LRI->LastUse = &MI;
- LRI->LastOpNum = OpNum;
- LRI->Dirty = true;
- markRegUsedInInstr(LRI->PhysReg);
- return LRI->PhysReg;
+ if (LRI->PhysReg == 0)
+ allocVirtReg(MI, *LRI, 0, LookAtPhysRegUses);
+ else {
+ assert(!isRegUsedInInstr(LRI->PhysReg, LookAtPhysRegUses) &&
+ "TODO: preassign mismatch");
+ LLVM_DEBUG(dbgs() << "In def of " << printReg(VirtReg, TRI)
+ << " use existing assignment to "
+ << printReg(LRI->PhysReg, TRI) << '\n');
+ }
+
+ MCPhysReg PhysReg = LRI->PhysReg;
+ assert(PhysReg != 0 && "Register not assigned");
+ if (LRI->Reloaded || LRI->LiveOut) {
+ if (!MI.isImplicitDef()) {
+ MachineBasicBlock::iterator SpillBefore =
+ std::next((MachineBasicBlock::iterator)MI.getIterator());
+ LLVM_DEBUG(dbgs() << "Spill Reason: LO: " << LRI->LiveOut << " RL: "
+ << LRI->Reloaded << '\n');
+ bool Kill = LRI->LastUse == nullptr;
+ spill(SpillBefore, VirtReg, PhysReg, Kill, LRI->LiveOut);
+ LRI->LastUse = nullptr;
+ }
+ LRI->LiveOut = false;
+ LRI->Reloaded = false;
+ }
+ if (MI.getOpcode() == TargetOpcode::BUNDLE) {
+ BundleVirtRegsMap[VirtReg] = PhysReg;
+ }
+ markRegUsedInInstr(PhysReg);
+ setPhysReg(MI, MO, PhysReg);
}
-/// Make sure VirtReg is available in a physreg and return it.
-RegAllocFast::LiveReg &RegAllocFast::reloadVirtReg(MachineInstr &MI,
- unsigned OpNum,
- Register VirtReg,
- Register Hint) {
- assert(Register::isVirtualRegister(VirtReg) && "Not a virtual register");
+/// Allocates a register for a VirtReg use.
+void RegAllocFast::useVirtReg(MachineInstr &MI, unsigned OpNum,
+ Register VirtReg) {
+ assert(VirtReg.isVirtual() && "Not a virtual register");
+ MachineOperand &MO = MI.getOperand(OpNum);
LiveRegMap::iterator LRI;
bool New;
std::tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg));
- MachineOperand &MO = MI.getOperand(OpNum);
- if (!LRI->PhysReg) {
- allocVirtReg(MI, *LRI, Hint);
- reload(MI, VirtReg, LRI->PhysReg);
- } else if (LRI->Dirty) {
- if (isLastUseOfLocalReg(MO)) {
- LLVM_DEBUG(dbgs() << "Killing last use: " << MO << '\n');
- if (MO.isUse())
- MO.setIsKill();
- else
- MO.setIsDead();
- } else if (MO.isKill()) {
- LLVM_DEBUG(dbgs() << "Clearing dubious kill: " << MO << '\n');
- MO.setIsKill(false);
- } else if (MO.isDead()) {
- LLVM_DEBUG(dbgs() << "Clearing dubious dead: " << MO << '\n');
- MO.setIsDead(false);
+ if (New) {
+ MachineOperand &MO = MI.getOperand(OpNum);
+ if (!MO.isKill()) {
+ if (mayLiveOut(VirtReg)) {
+ LRI->LiveOut = true;
+ } else {
+ // It is a last (killing) use without the kill flag; add the flag now.
+ MO.setIsKill(true);
+ }
}
- } else if (MO.isKill()) {
- // We must remove kill flags from uses of reloaded registers because the
- // register would be killed immediately, and there might be a second use:
- // %foo = OR killed %x, %x
- // This would cause a second reload of %x into a different register.
- LLVM_DEBUG(dbgs() << "Clearing clean kill: " << MO << '\n');
- MO.setIsKill(false);
- } else if (MO.isDead()) {
- LLVM_DEBUG(dbgs() << "Clearing clean dead: " << MO << '\n');
- MO.setIsDead(false);
+ } else {
+ assert((!MO.isKill() || LRI->LastUse == &MI) && "Invalid kill flag");
}
- assert(LRI->PhysReg && "Register not assigned");
+
+ // If necessary allocate a register.
+ if (LRI->PhysReg == 0) {
+ assert(!MO.isTied() && "tied op should be allocated");
+ Register Hint;
+ if (MI.isCopy() && MI.getOperand(1).getSubReg() == 0) {
+ Hint = MI.getOperand(0).getReg();
+ assert(Hint.isPhysical() &&
+ "Copy destination should already be assigned");
+ }
+ allocVirtReg(MI, *LRI, Hint, false);
+ if (LRI->Error) {
+ const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
+ ArrayRef<MCPhysReg> AllocationOrder = RegClassInfo.getOrder(&RC);
+ setPhysReg(MI, MO, *AllocationOrder.begin());
+ return;
+ }
+ }
+
LRI->LastUse = &MI;
- LRI->LastOpNum = OpNum;
+
+ if (MI.getOpcode() == TargetOpcode::BUNDLE) {
+ BundleVirtRegsMap[VirtReg] = LRI->PhysReg;
+ }
markRegUsedInInstr(LRI->PhysReg);
- return *LRI;
+ setPhysReg(MI, MO, LRI->PhysReg);
}
/// Changes operand OpNum in MI the refer the PhysReg, considering subregs. This
/// may invalidate any operand pointers. Return true if the operand kills its
/// register.
-bool RegAllocFast::setPhysReg(MachineInstr &MI, MachineOperand &MO,
+void RegAllocFast::setPhysReg(MachineInstr &MI, MachineOperand &MO,
MCPhysReg PhysReg) {
- bool Dead = MO.isDead();
if (!MO.getSubReg()) {
MO.setReg(PhysReg);
MO.setIsRenamable(true);
- return MO.isKill() || Dead;
+ return;
}
// Handle subregister index.
- MO.setReg(PhysReg ? TRI->getSubReg(PhysReg, MO.getSubReg()) : Register());
+ MO.setReg(PhysReg ? TRI->getSubReg(PhysReg, MO.getSubReg()) : MCRegister());
MO.setIsRenamable(true);
- MO.setSubReg(0);
+ // Note: We leave the subreg number around a little longer in case of defs.
+ // This is so that the register freeing logic in allocateInstruction can still
+ // recognize this as subregister defs. The code there will clear the number.
+ if (!MO.isDef())
+ MO.setSubReg(0);
// A kill flag implies killing the full register. Add corresponding super
// register kill.
if (MO.isKill()) {
MI.addRegisterKilled(PhysReg, TRI, true);
- return true;
+ return;
}
// A <def,read-undef> of a sub-register requires an implicit def of the full
// register.
- if (MO.isDef() && MO.isUndef())
- MI.addRegisterDefined(PhysReg, TRI);
-
- return Dead;
-}
-
-// Handles special instruction operand like early clobbers and tied ops when
-// there are additional physreg defines.
-void RegAllocFast::handleThroughOperands(MachineInstr &MI,
- SmallVectorImpl<Register> &VirtDead) {
- LLVM_DEBUG(dbgs() << "Scanning for through registers:");
- SmallSet<Register, 8> ThroughRegs;
- for (const MachineOperand &MO : MI.operands()) {
- if (!MO.isReg()) continue;
- Register Reg = MO.getReg();
- if (!Reg.isVirtual())
- continue;
- if (MO.isEarlyClobber() || (MO.isUse() && MO.isTied()) ||
- (MO.getSubReg() && MI.readsVirtualRegister(Reg))) {
- if (ThroughRegs.insert(Reg).second)
- LLVM_DEBUG(dbgs() << ' ' << printReg(Reg));
- }
- }
-
- // If any physreg defines collide with preallocated through registers,
- // we must spill and reallocate.
- LLVM_DEBUG(dbgs() << "\nChecking for physdef collisions.\n");
- for (const MachineOperand &MO : MI.operands()) {
- if (!MO.isReg() || !MO.isDef()) continue;
- Register Reg = MO.getReg();
- if (!Reg || !Reg.isPhysical())
- continue;
- markRegUsedInInstr(Reg);
- for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
- if (ThroughRegs.count(PhysRegState[*AI]))
- definePhysReg(MI, *AI, regFree);
- }
- }
-
- SmallVector<Register, 8> PartialDefs;
- LLVM_DEBUG(dbgs() << "Allocating tied uses.\n");
- for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
- MachineOperand &MO = MI.getOperand(I);
- if (!MO.isReg()) continue;
- Register Reg = MO.getReg();
- if (!Register::isVirtualRegister(Reg))
- continue;
- if (MO.isUse()) {
- if (!MO.isTied()) continue;
- LLVM_DEBUG(dbgs() << "Operand " << I << "(" << MO
- << ") is tied to operand " << MI.findTiedOperandIdx(I)
- << ".\n");
- LiveReg &LR = reloadVirtReg(MI, I, Reg, 0);
- MCPhysReg PhysReg = LR.PhysReg;
- setPhysReg(MI, MO, PhysReg);
- // Note: we don't update the def operand yet. That would cause the normal
- // def-scan to attempt spilling.
- } else if (MO.getSubReg() && MI.readsVirtualRegister(Reg)) {
- LLVM_DEBUG(dbgs() << "Partial redefine: " << MO << '\n');
- // Reload the register, but don't assign to the operand just yet.
- // That would confuse the later phys-def processing pass.
- LiveReg &LR = reloadVirtReg(MI, I, Reg, 0);
- PartialDefs.push_back(LR.PhysReg);
- }
- }
-
- LLVM_DEBUG(dbgs() << "Allocating early clobbers.\n");
- for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
- const MachineOperand &MO = MI.getOperand(I);
- if (!MO.isReg()) continue;
- Register Reg = MO.getReg();
- if (!Register::isVirtualRegister(Reg))
- continue;
- if (!MO.isEarlyClobber())
- continue;
- // Note: defineVirtReg may invalidate MO.
- MCPhysReg PhysReg = defineVirtReg(MI, I, Reg, 0);
- if (setPhysReg(MI, MI.getOperand(I), PhysReg))
- VirtDead.push_back(Reg);
- }
-
- // Restore UsedInInstr to a state usable for allocating normal virtual uses.
- UsedInInstr.clear();
- for (const MachineOperand &MO : MI.operands()) {
- if (!MO.isReg() || (MO.isDef() && !MO.isEarlyClobber())) continue;
- Register Reg = MO.getReg();
- if (!Reg || !Reg.isPhysical())
- continue;
- LLVM_DEBUG(dbgs() << "\tSetting " << printReg(Reg, TRI)
- << " as used in instr\n");
- markRegUsedInInstr(Reg);
+ if (MO.isDef() && MO.isUndef()) {
+ if (MO.isDead())
+ MI.addRegisterDead(PhysReg, TRI, true);
+ else
+ MI.addRegisterDefined(PhysReg, TRI);
}
-
- // Also mark PartialDefs as used to avoid reallocation.
- for (Register PartialDef : PartialDefs)
- markRegUsedInInstr(PartialDef);
}
#ifndef NDEBUG
-void RegAllocFast::dumpState() {
- for (unsigned Reg = 1, E = TRI->getNumRegs(); Reg != E; ++Reg) {
- if (PhysRegState[Reg] == regDisabled) continue;
- dbgs() << " " << printReg(Reg, TRI);
- switch(PhysRegState[Reg]) {
+
+void RegAllocFast::dumpState() const {
+ for (unsigned Unit = 1, UnitE = TRI->getNumRegUnits(); Unit != UnitE;
+ ++Unit) {
+ switch (unsigned VirtReg = RegUnitStates[Unit]) {
case regFree:
break;
- case regReserved:
- dbgs() << "*";
+ case regPreAssigned:
+ dbgs() << " " << printRegUnit(Unit, TRI) << "[P]";
break;
+ case regLiveIn:
+ llvm_unreachable("Should not have regLiveIn in map");
default: {
- dbgs() << '=' << printReg(PhysRegState[Reg]);
- LiveRegMap::iterator LRI = findLiveVirtReg(PhysRegState[Reg]);
- assert(LRI != LiveVirtRegs.end() && LRI->PhysReg &&
- "Missing VirtReg entry");
- if (LRI->Dirty)
- dbgs() << "*";
- assert(LRI->PhysReg == Reg && "Bad inverse map");
+ dbgs() << ' ' << printRegUnit(Unit, TRI) << '=' << printReg(VirtReg);
+ LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg);
+ assert(I != LiveVirtRegs.end() && "have LiveVirtRegs entry");
+ if (I->LiveOut || I->Reloaded) {
+ dbgs() << '[';
+ if (I->LiveOut) dbgs() << 'O';
+ if (I->Reloaded) dbgs() << 'R';
+ dbgs() << ']';
+ }
+ assert(TRI->hasRegUnit(I->PhysReg, Unit) && "inverse mapping present");
break;
}
}
}
dbgs() << '\n';
// Check that LiveVirtRegs is the inverse.
- for (LiveRegMap::iterator i = LiveVirtRegs.begin(),
- e = LiveVirtRegs.end(); i != e; ++i) {
- if (!i->PhysReg)
- continue;
- assert(i->VirtReg.isVirtual() && "Bad map key");
- assert(Register::isPhysicalRegister(i->PhysReg) && "Bad map value");
- assert(PhysRegState[i->PhysReg] == i->VirtReg && "Bad inverse map");
+ for (const LiveReg &LR : LiveVirtRegs) {
+ Register VirtReg = LR.VirtReg;
+ assert(VirtReg.isVirtual() && "Bad map key");
+ MCPhysReg PhysReg = LR.PhysReg;
+ if (PhysReg != 0) {
+ assert(Register::isPhysicalRegister(PhysReg) &&
+ "mapped to physreg");
+ for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) {
+ assert(RegUnitStates[*UI] == VirtReg && "inverse map valid");
+ }
+ }
}
}
#endif
-void RegAllocFast::allocateInstruction(MachineInstr &MI) {
- const MCInstrDesc &MCID = MI.getDesc();
-
- // If this is a copy, we may be able to coalesce.
- Register CopySrcReg;
- Register CopyDstReg;
- unsigned CopySrcSub = 0;
- unsigned CopyDstSub = 0;
- if (MI.isCopy()) {
- CopyDstReg = MI.getOperand(0).getReg();
- CopySrcReg = MI.getOperand(1).getReg();
- CopyDstSub = MI.getOperand(0).getSubReg();
- CopySrcSub = MI.getOperand(1).getSubReg();
+/// Count number of defs consumed from each register class by \p Reg
+void RegAllocFast::addRegClassDefCounts(std::vector<unsigned> &RegClassDefCounts,
+ Register Reg) const {
+ assert(RegClassDefCounts.size() == TRI->getNumRegClasses());
+
+ if (Reg.isVirtual()) {
+ const TargetRegisterClass *OpRC = MRI->getRegClass(Reg);
+ for (unsigned RCIdx = 0, RCIdxEnd = TRI->getNumRegClasses();
+ RCIdx != RCIdxEnd; ++RCIdx) {
+ const TargetRegisterClass *IdxRC = TRI->getRegClass(RCIdx);
+ // FIXME: Consider aliasing sub/super registers.
+ if (OpRC->hasSubClassEq(IdxRC))
+ ++RegClassDefCounts[RCIdx];
+ }
+
+ return;
}
- // Track registers used by instruction.
- UsedInInstr.clear();
+ for (unsigned RCIdx = 0, RCIdxEnd = TRI->getNumRegClasses();
+ RCIdx != RCIdxEnd; ++RCIdx) {
+ const TargetRegisterClass *IdxRC = TRI->getRegClass(RCIdx);
+ for (MCRegAliasIterator Alias(Reg, TRI, true); Alias.isValid(); ++Alias) {
+ if (IdxRC->contains(*Alias)) {
+ ++RegClassDefCounts[RCIdx];
+ break;
+ }
+ }
+ }
+}
- // First scan.
- // Mark physreg uses and early clobbers as used.
- // Find the end of the virtreg operands
- unsigned VirtOpEnd = 0;
- bool hasTiedOps = false;
- bool hasEarlyClobbers = false;
- bool hasPartialRedefs = false;
- bool hasPhysDefs = false;
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI.getOperand(i);
- // Make sure MRI knows about registers clobbered by regmasks.
- if (MO.isRegMask()) {
- MRI->addPhysRegsUsedFromRegMask(MO.getRegMask());
- continue;
+void RegAllocFast::allocateInstruction(MachineInstr &MI) {
+ // The basic algorithm here is:
+ // 1. Mark registers of def operands as free
+ // 2. Allocate registers to use operands and place reload instructions for
+ // registers displaced by the allocation.
+ //
+ // However we need to handle some corner cases:
+ // - pre-assigned defs and uses need to be handled before the other def/use
+ // operands are processed to avoid the allocation heuristics clashing with
+ // the pre-assignment.
+ // - The "free def operands" step has to come last instead of first for tied
+ // operands and early-clobbers.
+
+ UsedInInstr.clear();
+ BundleVirtRegsMap.clear();
+
+ // Scan for special cases; Apply pre-assigned register defs to state.
+ bool HasPhysRegUse = false;
+ bool HasRegMask = false;
+ bool HasVRegDef = false;
+ bool HasDef = false;
+ bool HasEarlyClobber = false;
+ bool NeedToAssignLiveThroughs = false;
+ for (MachineOperand &MO : MI.operands()) {
+ if (MO.isReg()) {
+ Register Reg = MO.getReg();
+ if (Reg.isVirtual()) {
+ if (MO.isDef()) {
+ HasDef = true;
+ HasVRegDef = true;
+ if (MO.isEarlyClobber()) {
+ HasEarlyClobber = true;
+ NeedToAssignLiveThroughs = true;
+ }
+ if (MO.isTied() || (MO.getSubReg() != 0 && !MO.isUndef()))
+ NeedToAssignLiveThroughs = true;
+ }
+ } else if (Reg.isPhysical()) {
+ if (!MRI->isReserved(Reg)) {
+ if (MO.isDef()) {
+ HasDef = true;
+ bool displacedAny = definePhysReg(MI, Reg);
+ if (MO.isEarlyClobber())
+ HasEarlyClobber = true;
+ if (!displacedAny)
+ MO.setIsDead(true);
+ }
+ if (MO.readsReg())
+ HasPhysRegUse = true;
+ }
+ }
+ } else if (MO.isRegMask()) {
+ HasRegMask = true;
}
- if (!MO.isReg()) continue;
- Register Reg = MO.getReg();
- if (!Reg) continue;
- if (Register::isVirtualRegister(Reg)) {
- VirtOpEnd = i+1;
- if (MO.isUse()) {
- hasTiedOps = hasTiedOps ||
- MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1;
+ }
+
+ // Allocate virtreg defs.
+ if (HasDef) {
+ if (HasVRegDef) {
+ // Special handling for early clobbers, tied operands or subregister defs:
+ // Compared to "normal" defs these:
+ // - Must not use a register that is pre-assigned for a use operand.
+ // - In order to solve tricky inline assembly constraints we change the
+ // heuristic to figure out a good operand order before doing
+ // assignments.
+ if (NeedToAssignLiveThroughs) {
+ DefOperandIndexes.clear();
+ PhysRegUses.clear();
+
+ // Track number of defs which may consume a register from the class.
+ std::vector<unsigned> RegClassDefCounts(TRI->getNumRegClasses(), 0);
+ assert(RegClassDefCounts[0] == 0);
+
+ LLVM_DEBUG(dbgs() << "Need to assign livethroughs\n");
+ for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I) {
+ const MachineOperand &MO = MI.getOperand(I);
+ if (!MO.isReg())
+ continue;
+ Register Reg = MO.getReg();
+ if (MO.readsReg()) {
+ if (Reg.isPhysical()) {
+ LLVM_DEBUG(dbgs() << "mark extra used: " << printReg(Reg, TRI)
+ << '\n');
+ markPhysRegUsedInInstr(Reg);
+ }
+ }
+
+ if (MO.isDef()) {
+ if (Reg.isVirtual())
+ DefOperandIndexes.push_back(I);
+
+ addRegClassDefCounts(RegClassDefCounts, Reg);
+ }
+ }
+
+ llvm::sort(DefOperandIndexes, [&](uint16_t I0, uint16_t I1) {
+ const MachineOperand &MO0 = MI.getOperand(I0);
+ const MachineOperand &MO1 = MI.getOperand(I1);
+ Register Reg0 = MO0.getReg();
+ Register Reg1 = MO1.getReg();
+ const TargetRegisterClass &RC0 = *MRI->getRegClass(Reg0);
+ const TargetRegisterClass &RC1 = *MRI->getRegClass(Reg1);
+
+ // Identify regclass that are easy to use up completely just in this
+ // instruction.
+ unsigned ClassSize0 = RegClassInfo.getOrder(&RC0).size();
+ unsigned ClassSize1 = RegClassInfo.getOrder(&RC1).size();
+
+ bool SmallClass0 = ClassSize0 < RegClassDefCounts[RC0.getID()];
+ bool SmallClass1 = ClassSize1 < RegClassDefCounts[RC1.getID()];
+ if (SmallClass0 > SmallClass1)
+ return true;
+ if (SmallClass0 < SmallClass1)
+ return false;
+
+ // Allocate early clobbers and livethrough operands first.
+ bool Livethrough0 = MO0.isEarlyClobber() || MO0.isTied() ||
+ (MO0.getSubReg() == 0 && !MO0.isUndef());
+ bool Livethrough1 = MO1.isEarlyClobber() || MO1.isTied() ||
+ (MO1.getSubReg() == 0 && !MO1.isUndef());
+ if (Livethrough0 > Livethrough1)
+ return true;
+ if (Livethrough0 < Livethrough1)
+ return false;
+
+ // Tie-break rule: operand index.
+ return I0 < I1;
+ });
+
+ for (uint16_t OpIdx : DefOperandIndexes) {
+ MachineOperand &MO = MI.getOperand(OpIdx);
+ LLVM_DEBUG(dbgs() << "Allocating " << MO << '\n');
+ unsigned Reg = MO.getReg();
+ if (MO.isEarlyClobber() || MO.isTied() ||
+ (MO.getSubReg() && !MO.isUndef())) {
+ defineLiveThroughVirtReg(MI, OpIdx, Reg);
+ } else {
+ defineVirtReg(MI, OpIdx, Reg);
+ }
+ }
} else {
- if (MO.isEarlyClobber())
- hasEarlyClobbers = true;
- if (MO.getSubReg() && MI.readsVirtualRegister(Reg))
- hasPartialRedefs = true;
+ // Assign virtual register defs.
+ for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I) {
+ MachineOperand &MO = MI.getOperand(I);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ Register Reg = MO.getReg();
+ if (Reg.isVirtual())
+ defineVirtReg(MI, I, Reg);
+ }
}
- continue;
}
- if (!MRI->isAllocatable(Reg)) continue;
- if (MO.isUse()) {
- usePhysReg(MO);
- } else if (MO.isEarlyClobber()) {
- definePhysReg(MI, Reg,
- (MO.isImplicit() || MO.isDead()) ? regFree : regReserved);
- hasEarlyClobbers = true;
- } else
- hasPhysDefs = true;
+
+ // Free registers occupied by defs.
+ // Iterate operands in reverse order, so we see the implicit super register
+ // defs first (we added them earlier in case of <def,read-undef>).
+ for (unsigned I = MI.getNumOperands(); I-- > 0;) {
+ MachineOperand &MO = MI.getOperand(I);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+
+ // subreg defs don't free the full register. We left the subreg number
+ // around as a marker in setPhysReg() to recognize this case here.
+ if (MO.getSubReg() != 0) {
+ MO.setSubReg(0);
+ continue;
+ }
+
+ // Do not free tied operands and early clobbers.
+ if (MO.isTied() || MO.isEarlyClobber())
+ continue;
+ Register Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ assert(Reg.isPhysical());
+ if (MRI->isReserved(Reg))
+ continue;
+ freePhysReg(Reg);
+ unmarkRegUsedInInstr(Reg);
+ }
}
- // The instruction may have virtual register operands that must be allocated
- // the same register at use-time and def-time: early clobbers and tied
- // operands. If there are also physical defs, these registers must avoid
- // both physical defs and uses, making them more constrained than normal
- // operands.
- // Similarly, if there are multiple defs and tied operands, we must make
- // sure the same register is allocated to uses and defs.
- // We didn't detect inline asm tied operands above, so just make this extra
- // pass for all inline asm.
- if (MI.isInlineAsm() || hasEarlyClobbers || hasPartialRedefs ||
- (hasTiedOps && (hasPhysDefs || MCID.getNumDefs() > 1))) {
- handleThroughOperands(MI, VirtDead);
- // Don't attempt coalescing when we have funny stuff going on.
- CopyDstReg = Register();
- // Pretend we have early clobbers so the use operands get marked below.
- // This is not necessary for the common case of a single tied use.
- hasEarlyClobbers = true;
+ // Displace clobbered registers.
+ if (HasRegMask) {
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isRegMask()) {
+ // MRI bookkeeping.
+ MRI->addPhysRegsUsedFromRegMask(MO.getRegMask());
+
+ // Displace clobbered registers.
+ const uint32_t *Mask = MO.getRegMask();
+ for (LiveRegMap::iterator LRI = LiveVirtRegs.begin(),
+ LRIE = LiveVirtRegs.end(); LRI != LRIE; ++LRI) {
+ MCPhysReg PhysReg = LRI->PhysReg;
+ if (PhysReg != 0 && MachineOperand::clobbersPhysReg(Mask, PhysReg))
+ displacePhysReg(MI, PhysReg);
+ }
+ }
+ }
}
- // Second scan.
- // Allocate virtreg uses.
+ // Apply pre-assigned register uses to state.
+ if (HasPhysRegUse) {
+ for (MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg() || !MO.readsReg())
+ continue;
+ Register Reg = MO.getReg();
+ if (!Reg.isPhysical())
+ continue;
+ if (MRI->isReserved(Reg))
+ continue;
+ bool displacedAny = usePhysReg(MI, Reg);
+ if (!displacedAny && !MRI->isReserved(Reg))
+ MO.setIsKill(true);
+ }
+ }
+
+ // Allocate virtreg uses and insert reloads as necessary.
bool HasUndefUse = false;
- for (unsigned I = 0; I != VirtOpEnd; ++I) {
+ for (unsigned I = 0; I < MI.getNumOperands(); ++I) {
MachineOperand &MO = MI.getOperand(I);
- if (!MO.isReg()) continue;
+ if (!MO.isReg() || !MO.isUse())
+ continue;
Register Reg = MO.getReg();
if (!Reg.isVirtual())
continue;
- if (MO.isUse()) {
- if (MO.isUndef()) {
- HasUndefUse = true;
- // There is no need to allocate a register for an undef use.
- continue;
- }
- // Populate MayLiveAcrossBlocks in case the use block is allocated before
- // the def block (removing the vreg uses).
- mayLiveIn(Reg);
-
- LiveReg &LR = reloadVirtReg(MI, I, Reg, CopyDstReg);
- MCPhysReg PhysReg = LR.PhysReg;
- CopySrcReg = (CopySrcReg == Reg || CopySrcReg == PhysReg) ? PhysReg : 0;
- if (setPhysReg(MI, MO, PhysReg))
- killVirtReg(LR);
+ if (MO.isUndef()) {
+ HasUndefUse = true;
+ continue;
}
+
+
+ // Populate MayLiveAcrossBlocks in case the use block is allocated before
+ // the def block (removing the vreg uses).
+ mayLiveIn(Reg);
+
+
+ assert(!MO.isInternalRead() && "Bundles not supported");
+ assert(MO.readsReg() && "reading use");
+ useVirtReg(MI, I, Reg);
}
// Allocate undef operands. This is a separate step because in a situation
@@ -1133,76 +1321,40 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
}
}
- // Track registers defined by instruction - early clobbers and tied uses at
- // this point.
- UsedInInstr.clear();
- if (hasEarlyClobbers) {
- for (const MachineOperand &MO : MI.operands()) {
- if (!MO.isReg()) continue;
- Register Reg = MO.getReg();
- if (!Reg || !Reg.isPhysical())
+ // Free early clobbers.
+ if (HasEarlyClobber) {
+ for (unsigned I = MI.getNumOperands(); I-- > 0; ) {
+ MachineOperand &MO = MI.getOperand(I);
+ if (!MO.isReg() || !MO.isDef() || !MO.isEarlyClobber())
continue;
- // Look for physreg defs and tied uses.
- if (!MO.isDef() && !MO.isTied()) continue;
- markRegUsedInInstr(Reg);
- }
- }
-
- unsigned DefOpEnd = MI.getNumOperands();
- if (MI.isCall()) {
- // Spill all virtregs before a call. This serves one purpose: If an
- // exception is thrown, the landing pad is going to expect to find
- // registers in their spill slots.
- // Note: although this is appealing to just consider all definitions
- // as call-clobbered, this is not correct because some of those
- // definitions may be used later on and we do not want to reuse
- // those for virtual registers in between.
- LLVM_DEBUG(dbgs() << " Spilling remaining registers before call.\n");
- spillAll(MI, /*OnlyLiveOut*/ false);
- }
-
- // Third scan.
- // Mark all physreg defs as used before allocating virtreg defs.
- for (unsigned I = 0; I != DefOpEnd; ++I) {
- const MachineOperand &MO = MI.getOperand(I);
- if (!MO.isReg() || !MO.isDef() || !MO.getReg() || MO.isEarlyClobber())
- continue;
- Register Reg = MO.getReg();
-
- if (!Reg || !Reg.isPhysical() || !MRI->isAllocatable(Reg))
- continue;
- definePhysReg(MI, Reg, MO.isDead() ? regFree : regReserved);
- }
+ // subreg defs don't free the full register. We left the subreg number
+ // around as a marker in setPhysReg() to recognize this case here.
+ if (MO.getSubReg() != 0) {
+ MO.setSubReg(0);
+ continue;
+ }
- // Fourth scan.
- // Allocate defs and collect dead defs.
- for (unsigned I = 0; I != DefOpEnd; ++I) {
- const MachineOperand &MO = MI.getOperand(I);
- if (!MO.isReg() || !MO.isDef() || !MO.getReg() || MO.isEarlyClobber())
- continue;
- Register Reg = MO.getReg();
+ Register Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ assert(Reg.isPhysical() && "should have register assigned");
+
+ // We sometimes get odd situations like:
+ // early-clobber %x0 = INSTRUCTION %x0
+ // which is semantically questionable as the early-clobber should
+ // apply before the use. But in practice we consider the use to
+ // happen before the early clobber now. Don't free the early clobber
+ // register in this case.
+ if (MI.readsRegister(Reg, TRI))
+ continue;
- // We have already dealt with phys regs in the previous scan.
- if (Reg.isPhysical())
- continue;
- MCPhysReg PhysReg = defineVirtReg(MI, I, Reg, CopySrcReg);
- if (setPhysReg(MI, MI.getOperand(I), PhysReg)) {
- VirtDead.push_back(Reg);
- CopyDstReg = Register(); // cancel coalescing;
- } else
- CopyDstReg = (CopyDstReg == Reg || CopyDstReg == PhysReg) ? PhysReg : 0;
+ freePhysReg(Reg);
+ }
}
- // Kill dead defs after the scan to ensure that multiple defs of the same
- // register are allocated identically. We didn't need to do this for uses
- // because we are crerating our own kill flags, and they are always at the
- // last use.
- for (Register VirtReg : VirtDead)
- killVirtReg(VirtReg);
- VirtDead.clear();
-
LLVM_DEBUG(dbgs() << "<< " << MI);
- if (CopyDstReg && CopyDstReg == CopySrcReg && CopyDstSub == CopySrcSub) {
+ if (MI.isCopy() && MI.getOperand(0).getReg() == MI.getOperand(1).getReg() &&
+ MI.getNumOperands() == 2) {
LLVM_DEBUG(dbgs() << "Mark identity copy for removal\n");
Coalesced.push_back(&MI);
}
@@ -1219,23 +1371,22 @@ void RegAllocFast::handleDebugValue(MachineInstr &MI) {
if (!Register::isVirtualRegister(Reg))
return;
+ // Already spilled to a stackslot?
+ int SS = StackSlotForVirtReg[Reg];
+ if (SS != -1) {
+ // Modify DBG_VALUE now that the value is in a spill slot.
+ updateDbgValueForSpill(MI, SS);
+ LLVM_DEBUG(dbgs() << "Rewrite DBG_VALUE for spilled memory: " << MI);
+ return;
+ }
+
// See if this virtual register has already been allocated to a physical
// register or spilled to a stack slot.
LiveRegMap::iterator LRI = findLiveVirtReg(Reg);
if (LRI != LiveVirtRegs.end() && LRI->PhysReg) {
setPhysReg(MI, MO, LRI->PhysReg);
} else {
- int SS = StackSlotForVirtReg[Reg];
- if (SS != -1) {
- // Modify DBG_VALUE now that the value is in a spill slot.
- updateDbgValueForSpill(MI, SS);
- LLVM_DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << MI);
- return;
- }
-
- // We can't allocate a physreg for a DebugValue, sorry!
- LLVM_DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE");
- MO.setReg(Register());
+ DanglingDbgValues[Reg].push_back(&MI);
}
// If Reg hasn't been spilled, put this DBG_VALUE in LiveDbgValueMap so
@@ -1243,25 +1394,46 @@ void RegAllocFast::handleDebugValue(MachineInstr &MI) {
LiveDbgValueMap[Reg].push_back(&MI);
}
+void RegAllocFast::handleBundle(MachineInstr &MI) {
+ MachineBasicBlock::instr_iterator BundledMI = MI.getIterator();
+ ++BundledMI;
+ while (BundledMI->isBundledWithPred()) {
+ for (unsigned I = 0; I < BundledMI->getNumOperands(); ++I) {
+ MachineOperand &MO = BundledMI->getOperand(I);
+ if (!MO.isReg())
+ continue;
+
+ Register Reg = MO.getReg();
+ if (!Reg.isVirtual())
+ continue;
+
+ DenseMap<Register, MCPhysReg>::iterator DI;
+ DI = BundleVirtRegsMap.find(Reg);
+ assert(DI != BundleVirtRegsMap.end() && "Unassigned virtual register");
+
+ setPhysReg(MI, MO, DI->second);
+ }
+
+ ++BundledMI;
+ }
+}
+
void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
this->MBB = &MBB;
LLVM_DEBUG(dbgs() << "\nAllocating " << MBB);
- PhysRegState.assign(TRI->getNumRegs(), regDisabled);
+ RegUnitStates.assign(TRI->getNumRegUnits(), regFree);
assert(LiveVirtRegs.empty() && "Mapping not cleared from last block?");
- MachineBasicBlock::iterator MII = MBB.begin();
-
- // Add live-in registers as live.
- for (const MachineBasicBlock::RegisterMaskPair &LI : MBB.liveins())
- if (MRI->isAllocatable(LI.PhysReg))
- definePhysReg(MII, LI.PhysReg, regReserved);
+ for (MachineBasicBlock *Succ : MBB.successors()) {
+ for (const MachineBasicBlock::RegisterMaskPair &LI : Succ->liveins())
+ setPhysRegState(LI.PhysReg, regPreAssigned);
+ }
- VirtDead.clear();
Coalesced.clear();
- // Otherwise, sequentially allocate each instruction in the MBB.
- for (MachineInstr &MI : MBB) {
+ // Traverse block in reverse order allocating instructions one by one.
+ for (MachineInstr &MI : reverse(MBB)) {
LLVM_DEBUG(
dbgs() << "\n>> " << MI << "Regs:";
dumpState()
@@ -1275,11 +1447,22 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
}
allocateInstruction(MI);
+
+ // Once BUNDLE header is assigned registers, same assignments need to be
+ // done for bundled MIs.
+ if (MI.getOpcode() == TargetOpcode::BUNDLE) {
+ handleBundle(MI);
+ }
}
+ LLVM_DEBUG(
+ dbgs() << "Begin Regs:";
+ dumpState()
+ );
+
// Spill all physical registers holding virtual registers now.
- LLVM_DEBUG(dbgs() << "Spilling live registers at end of block.\n");
- spillAll(MBB.getFirstTerminator(), /*OnlyLiveOut*/ true);
+ LLVM_DEBUG(dbgs() << "Loading live registers at begin of block.\n");
+ reloadAtBegin(MBB);
// Erase all the coalesced copies. We are delaying it until now because
// LiveVirtRegs might refer to the instrs.
@@ -1287,6 +1470,20 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
MBB.erase(MI);
NumCoalesced += Coalesced.size();
+ for (auto &UDBGPair : DanglingDbgValues) {
+ for (MachineInstr *DbgValue : UDBGPair.second) {
+ assert(DbgValue->isDebugValue() && "expected DBG_VALUE");
+ MachineOperand &MO = DbgValue->getOperand(0);
+ // Nothing to do if the vreg was spilled in the meantime.
+ if (!MO.isReg())
+ continue;
+ LLVM_DEBUG(dbgs() << "Register did not survive for " << *DbgValue
+ << '\n');
+ MO.setReg(0);
+ }
+ }
+ DanglingDbgValues.clear();
+
LLVM_DEBUG(MBB.dump());
}
@@ -1300,8 +1497,11 @@ bool RegAllocFast::runOnMachineFunction(MachineFunction &MF) {
MFI = &MF.getFrameInfo();
MRI->freezeReservedRegs(MF);
RegClassInfo.runOnMachineFunction(MF);
+ unsigned NumRegUnits = TRI->getNumRegUnits();
UsedInInstr.clear();
- UsedInInstr.setUniverse(TRI->getNumRegUnits());
+ UsedInInstr.setUniverse(NumRegUnits);
+ PhysRegUses.clear();
+ PhysRegUses.setUniverse(NumRegUnits);
// initialize the virtual->physical register map to have a 'null'
// mapping for all virtual registers
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp
index 41cf00261265..166414e4ffa1 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -147,7 +147,7 @@ class RAGreedy : public MachineFunctionPass,
// Convenient shortcuts.
using PQueue = std::priority_queue<std::pair<unsigned, unsigned>>;
using SmallLISet = SmallPtrSet<LiveInterval *, 4>;
- using SmallVirtRegSet = SmallSet<unsigned, 16>;
+ using SmallVirtRegSet = SmallSet<Register, 16>;
// context
MachineFunction *MF;
@@ -172,6 +172,7 @@ class RAGreedy : public MachineFunctionPass,
std::unique_ptr<Spiller> SpillerInstance;
PQueue Queue;
unsigned NextCascade;
+ std::unique_ptr<VirtRegAuxInfo> VRAI;
// Live ranges pass through a number of stages as we try to allocate them.
// Some of the stages may also create new live ranges:
@@ -247,19 +248,19 @@ class RAGreedy : public MachineFunctionPass,
IndexedMap<RegInfo, VirtReg2IndexFunctor> ExtraRegInfo;
LiveRangeStage getStage(const LiveInterval &VirtReg) const {
- return ExtraRegInfo[VirtReg.reg].Stage;
+ return ExtraRegInfo[VirtReg.reg()].Stage;
}
void setStage(const LiveInterval &VirtReg, LiveRangeStage Stage) {
ExtraRegInfo.resize(MRI->getNumVirtRegs());
- ExtraRegInfo[VirtReg.reg].Stage = Stage;
+ ExtraRegInfo[VirtReg.reg()].Stage = Stage;
}
template<typename Iterator>
void setStage(Iterator Begin, Iterator End, LiveRangeStage NewStage) {
ExtraRegInfo.resize(MRI->getNumVirtRegs());
for (;Begin != End; ++Begin) {
- unsigned Reg = *Begin;
+ Register Reg = *Begin;
if (ExtraRegInfo[Reg].Stage == RS_New)
ExtraRegInfo[Reg].Stage = NewStage;
}
@@ -290,8 +291,8 @@ class RAGreedy : public MachineFunctionPass,
public:
using EvictorInfo =
- std::pair<unsigned /* evictor */, unsigned /* physreg */>;
- using EvicteeInfo = llvm::DenseMap<unsigned /* evictee */, EvictorInfo>;
+ std::pair<Register /* evictor */, MCRegister /* physreg */>;
+ using EvicteeInfo = llvm::DenseMap<Register /* evictee */, EvictorInfo>;
private:
/// Each Vreg that has been evicted in the last stage of selectOrSplit will
@@ -307,14 +308,14 @@ class RAGreedy : public MachineFunctionPass,
/// longer relevant.
/// \param Evictee The evictee Vreg for whom we want to clear collected
/// eviction info.
- void clearEvicteeInfo(unsigned Evictee) { Evictees.erase(Evictee); }
+ void clearEvicteeInfo(Register Evictee) { Evictees.erase(Evictee); }
/// Track new eviction.
/// The Evictor vreg has evicted the Evictee vreg from Physreg.
/// \param PhysReg The physical register Evictee was evicted from.
/// \param Evictor The evictor Vreg that evicted Evictee.
/// \param Evictee The evictee Vreg.
- void addEviction(unsigned PhysReg, unsigned Evictor, unsigned Evictee) {
+ void addEviction(MCRegister PhysReg, Register Evictor, Register Evictee) {
Evictees[Evictee].first = Evictor;
Evictees[Evictee].second = PhysReg;
}
@@ -323,7 +324,7 @@ class RAGreedy : public MachineFunctionPass,
/// \param Evictee The evictee vreg.
/// \return The Evictor vreg which evicted Evictee vreg from PhysReg. 0 if
/// nobody has evicted Evictee from PhysReg.
- EvictorInfo getEvictor(unsigned Evictee) {
+ EvictorInfo getEvictor(Register Evictee) {
if (Evictees.count(Evictee)) {
return Evictees[Evictee];
}
@@ -348,7 +349,7 @@ class RAGreedy : public MachineFunctionPass,
/// Global live range splitting candidate info.
struct GlobalSplitCandidate {
// Register intended for assignment, or 0.
- unsigned PhysReg;
+ MCRegister PhysReg;
// SplitKit interval index for this candidate.
unsigned IntvIdx;
@@ -360,7 +361,7 @@ class RAGreedy : public MachineFunctionPass,
BitVector LiveBundles;
SmallVector<unsigned, 8> ActiveBlocks;
- void reset(InterferenceCache &Cache, unsigned Reg) {
+ void reset(InterferenceCache &Cache, MCRegister Reg) {
PhysReg = Reg;
IntvIdx = 0;
Intf.setPhysReg(Cache, Reg);
@@ -368,12 +369,12 @@ class RAGreedy : public MachineFunctionPass,
ActiveBlocks.clear();
}
- // Set B[i] = C for every live bundle where B[i] was NoCand.
+ // Set B[I] = C for every live bundle where B[I] was NoCand.
unsigned getBundles(SmallVectorImpl<unsigned> &B, unsigned C) {
unsigned Count = 0;
- for (unsigned i : LiveBundles.set_bits())
- if (B[i] == NoCand) {
- B[i] = C;
+ for (unsigned I : LiveBundles.set_bits())
+ if (B[I] == NoCand) {
+ B[I] = C;
Count++;
}
return Count;
@@ -417,7 +418,8 @@ public:
Spiller &spiller() override { return *SpillerInstance; }
void enqueue(LiveInterval *LI) override;
LiveInterval *dequeue() override;
- Register selectOrSplit(LiveInterval&, SmallVectorImpl<Register>&) override;
+ MCRegister selectOrSplit(LiveInterval &,
+ SmallVectorImpl<Register> &) override;
void aboutToRemoveInterval(LiveInterval &) override;
/// Perform register allocation.
@@ -428,15 +430,20 @@ public:
MachineFunctionProperties::Property::NoPHIs);
}
+ MachineFunctionProperties getClearedProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::IsSSA);
+ }
+
static char ID;
private:
- Register selectOrSplitImpl(LiveInterval &, SmallVectorImpl<Register> &,
- SmallVirtRegSet &, unsigned = 0);
+ MCRegister selectOrSplitImpl(LiveInterval &, SmallVectorImpl<Register> &,
+ SmallVirtRegSet &, unsigned = 0);
- bool LRE_CanEraseVirtReg(unsigned) override;
- void LRE_WillShrinkVirtReg(unsigned) override;
- void LRE_DidCloneVirtReg(unsigned, unsigned) override;
+ bool LRE_CanEraseVirtReg(Register) override;
+ void LRE_WillShrinkVirtReg(Register) override;
+ void LRE_DidCloneVirtReg(Register, Register) override;
void enqueue(PQueue &CurQueue, LiveInterval *LI);
LiveInterval *dequeue(PQueue &CurQueue);
@@ -444,7 +451,7 @@ private:
bool addSplitConstraints(InterferenceCache::Cursor, BlockFrequency&);
bool addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>);
bool growRegion(GlobalSplitCandidate &Cand);
- bool splitCanCauseEvictionChain(unsigned Evictee, GlobalSplitCandidate &Cand,
+ bool splitCanCauseEvictionChain(Register Evictee, GlobalSplitCandidate &Cand,
unsigned BBNumber,
const AllocationOrder &Order);
bool splitCanCauseLocalSpill(unsigned VirtRegToSplit,
@@ -455,20 +462,20 @@ private:
bool *CanCauseEvictionChain);
bool calcCompactRegion(GlobalSplitCandidate&);
void splitAroundRegion(LiveRangeEdit&, ArrayRef<unsigned>);
- void calcGapWeights(unsigned, SmallVectorImpl<float>&);
+ void calcGapWeights(MCRegister, SmallVectorImpl<float> &);
Register canReassign(LiveInterval &VirtReg, Register PrevReg);
bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool);
- bool canEvictInterference(LiveInterval&, Register, bool, EvictionCost&,
- const SmallVirtRegSet&);
- bool canEvictInterferenceInRange(LiveInterval &VirtReg, Register oPhysReg,
+ bool canEvictInterference(LiveInterval &, MCRegister, bool, EvictionCost &,
+ const SmallVirtRegSet &);
+ bool canEvictInterferenceInRange(LiveInterval &VirtReg, MCRegister PhysReg,
SlotIndex Start, SlotIndex End,
EvictionCost &MaxCost);
- unsigned getCheapestEvicteeWeight(const AllocationOrder &Order,
- LiveInterval &VirtReg, SlotIndex Start,
- SlotIndex End, float *BestEvictWeight);
- void evictInterference(LiveInterval&, Register,
- SmallVectorImpl<Register>&);
- bool mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg,
+ MCRegister getCheapestEvicteeWeight(const AllocationOrder &Order,
+ LiveInterval &VirtReg, SlotIndex Start,
+ SlotIndex End, float *BestEvictWeight);
+ void evictInterference(LiveInterval &, MCRegister,
+ SmallVectorImpl<Register> &);
+ bool mayRecolorAllInterferences(MCRegister PhysReg, LiveInterval &VirtReg,
SmallLISet &RecoloringCandidates,
const SmallVirtRegSet &FixedRegisters);
@@ -478,8 +485,8 @@ private:
unsigned tryEvict(LiveInterval&, AllocationOrder&,
SmallVectorImpl<Register>&, unsigned,
const SmallVirtRegSet&);
- unsigned tryRegionSplit(LiveInterval&, AllocationOrder&,
- SmallVectorImpl<Register>&);
+ MCRegister tryRegionSplit(LiveInterval &, AllocationOrder &,
+ SmallVectorImpl<Register> &);
/// Calculate cost of region splitting.
unsigned calculateRegionSplitCost(LiveInterval &VirtReg,
AllocationOrder &Order,
@@ -492,9 +499,10 @@ private:
SmallVectorImpl<Register> &NewVRegs);
/// Check other options before using a callee-saved register for the first
/// time.
- unsigned tryAssignCSRFirstTime(LiveInterval &VirtReg, AllocationOrder &Order,
- Register PhysReg, unsigned &CostPerUseLimit,
- SmallVectorImpl<Register> &NewVRegs);
+ MCRegister tryAssignCSRFirstTime(LiveInterval &VirtReg,
+ AllocationOrder &Order, MCRegister PhysReg,
+ unsigned &CostPerUseLimit,
+ SmallVectorImpl<Register> &NewVRegs);
void initializeCSRCost();
unsigned tryBlockSplit(LiveInterval&, AllocationOrder&,
SmallVectorImpl<Register>&);
@@ -528,8 +536,8 @@ private:
};
using HintsInfo = SmallVector<HintInfo, 4>;
- BlockFrequency getBrokenHintFreq(const HintsInfo &, unsigned);
- void collectHintInfo(unsigned, HintsInfo &);
+ BlockFrequency getBrokenHintFreq(const HintsInfo &, MCRegister);
+ void collectHintInfo(Register, HintsInfo &);
bool isUnusedCalleeSavedReg(MCRegister PhysReg) const;
@@ -626,7 +634,7 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
// LiveRangeEdit delegate methods
//===----------------------------------------------------------------------===//
-bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) {
+bool RAGreedy::LRE_CanEraseVirtReg(Register VirtReg) {
LiveInterval &LI = LIS->getInterval(VirtReg);
if (VRM->hasPhys(VirtReg)) {
Matrix->unassign(LI);
@@ -641,7 +649,7 @@ bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) {
return false;
}
-void RAGreedy::LRE_WillShrinkVirtReg(unsigned VirtReg) {
+void RAGreedy::LRE_WillShrinkVirtReg(Register VirtReg) {
if (!VRM->hasPhys(VirtReg))
return;
@@ -651,7 +659,7 @@ void RAGreedy::LRE_WillShrinkVirtReg(unsigned VirtReg) {
enqueue(&LI);
}
-void RAGreedy::LRE_DidCloneVirtReg(unsigned New, unsigned Old) {
+void RAGreedy::LRE_DidCloneVirtReg(Register New, Register Old) {
// Cloning a register we haven't even heard about yet? Just ignore it.
if (!ExtraRegInfo.inBounds(Old))
return;
@@ -677,9 +685,8 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) {
// Prioritize live ranges by size, assigning larger ranges first.
// The queue holds (size, reg) pairs.
const unsigned Size = LI->getSize();
- const unsigned Reg = LI->reg;
- assert(Register::isVirtualRegister(Reg) &&
- "Can only enqueue virtual registers");
+ const Register Reg = LI->reg();
+ assert(Reg.isVirtual() && "Can only enqueue virtual registers");
unsigned Prio;
ExtraRegInfo.grow(Reg);
@@ -756,26 +763,33 @@ Register RAGreedy::tryAssign(LiveInterval &VirtReg,
AllocationOrder &Order,
SmallVectorImpl<Register> &NewVRegs,
const SmallVirtRegSet &FixedRegisters) {
- Order.rewind();
Register PhysReg;
- while ((PhysReg = Order.next()))
- if (!Matrix->checkInterference(VirtReg, PhysReg))
- break;
- if (!PhysReg || Order.isHint())
+ for (auto I = Order.begin(), E = Order.end(); I != E && !PhysReg; ++I) {
+ assert(*I);
+ if (!Matrix->checkInterference(VirtReg, *I)) {
+ if (I.isHint())
+ return *I;
+ else
+ PhysReg = *I;
+ }
+ }
+ if (!PhysReg.isValid())
return PhysReg;
// PhysReg is available, but there may be a better choice.
// If we missed a simple hint, try to cheaply evict interference from the
// preferred register.
- if (Register Hint = MRI->getSimpleHint(VirtReg.reg))
+ if (Register Hint = MRI->getSimpleHint(VirtReg.reg()))
if (Order.isHint(Hint)) {
- LLVM_DEBUG(dbgs() << "missed hint " << printReg(Hint, TRI) << '\n');
+ MCRegister PhysHint = Hint.asMCReg();
+ LLVM_DEBUG(dbgs() << "missed hint " << printReg(PhysHint, TRI) << '\n');
EvictionCost MaxCost;
MaxCost.setBrokenHints(1);
- if (canEvictInterference(VirtReg, Hint, true, MaxCost, FixedRegisters)) {
- evictInterference(VirtReg, Hint, NewVRegs);
- return Hint;
+ if (canEvictInterference(VirtReg, PhysHint, true, MaxCost,
+ FixedRegisters)) {
+ evictInterference(VirtReg, PhysHint, NewVRegs);
+ return PhysHint;
}
// Record the missed hint, we may be able to recover
// at the end if the surrounding allocation changed.
@@ -800,13 +814,14 @@ Register RAGreedy::tryAssign(LiveInterval &VirtReg,
//===----------------------------------------------------------------------===//
Register RAGreedy::canReassign(LiveInterval &VirtReg, Register PrevReg) {
- AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix);
- Register PhysReg;
- while ((PhysReg = Order.next())) {
- if (PhysReg == PrevReg)
+ auto Order =
+ AllocationOrder::create(VirtReg.reg(), *VRM, RegClassInfo, Matrix);
+ MCRegister PhysReg;
+ for (auto I = Order.begin(), E = Order.end(); I != E && !PhysReg; ++I) {
+ if ((*I).id() == PrevReg.id())
continue;
- MCRegUnitIterator Units(PhysReg, TRI);
+ MCRegUnitIterator Units(*I, TRI);
for (; Units.isValid(); ++Units) {
// Instantiate a "subquery", not to be confused with the Queries array.
LiveIntervalUnion::Query subQ(VirtReg, Matrix->getLiveUnions()[*Units]);
@@ -815,7 +830,7 @@ Register RAGreedy::canReassign(LiveInterval &VirtReg, Register PrevReg) {
}
// If no units have interference, break out with the current PhysReg.
if (!Units.isValid())
- break;
+ PhysReg = *I;
}
if (PhysReg)
LLVM_DEBUG(dbgs() << "can reassign: " << VirtReg << " from "
@@ -846,8 +861,8 @@ bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint,
if (CanSplit && IsHint && !BreaksHint)
return true;
- if (A.weight > B.weight) {
- LLVM_DEBUG(dbgs() << "should evict: " << B << " w= " << B.weight << '\n');
+ if (A.weight() > B.weight()) {
+ LLVM_DEBUG(dbgs() << "should evict: " << B << " w= " << B.weight() << '\n');
return true;
}
return false;
@@ -862,7 +877,7 @@ bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint,
/// @param MaxCost Only look for cheaper candidates and update with new cost
/// when returning true.
/// @returns True when interference can be evicted cheaper than MaxCost.
-bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, Register PhysReg,
+bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, MCRegister PhysReg,
bool IsHint, EvictionCost &MaxCost,
const SmallVirtRegSet &FixedRegisters) {
// It is only possible to evict virtual register interference.
@@ -878,7 +893,7 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, Register PhysReg,
//
// This works out so a register without a cascade number is allowed to evict
// anything, and it can be evicted by anything.
- unsigned Cascade = ExtraRegInfo[VirtReg.reg].Cascade;
+ unsigned Cascade = ExtraRegInfo[VirtReg.reg()].Cascade;
if (!Cascade)
Cascade = NextCascade;
@@ -890,15 +905,14 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, Register PhysReg,
return false;
// Check if any interfering live range is heavier than MaxWeight.
- for (unsigned i = Q.interferingVRegs().size(); i; --i) {
- LiveInterval *Intf = Q.interferingVRegs()[i - 1];
- assert(Register::isVirtualRegister(Intf->reg) &&
+ for (LiveInterval *Intf : reverse(Q.interferingVRegs())) {
+ assert(Register::isVirtualRegister(Intf->reg()) &&
"Only expecting virtual register interference from query");
// Do not allow eviction of a virtual register if we are in the middle
// of last-chance recoloring and this virtual register is one that we
// have scavenged a physical register for.
- if (FixedRegisters.count(Intf->reg))
+ if (FixedRegisters.count(Intf->reg()))
return false;
// Never evict spill products. They cannot split or spill.
@@ -910,12 +924,14 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, Register PhysReg,
//
// Also allow urgent evictions of unspillable ranges from a strictly
// larger allocation order.
- bool Urgent = !VirtReg.isSpillable() &&
- (Intf->isSpillable() ||
- RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(VirtReg.reg)) <
- RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(Intf->reg)));
+ bool Urgent =
+ !VirtReg.isSpillable() &&
+ (Intf->isSpillable() ||
+ RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(VirtReg.reg())) <
+ RegClassInfo.getNumAllocatableRegs(
+ MRI->getRegClass(Intf->reg())));
// Only evict older cascades or live ranges without a cascade.
- unsigned IntfCascade = ExtraRegInfo[Intf->reg].Cascade;
+ unsigned IntfCascade = ExtraRegInfo[Intf->reg()].Cascade;
if (Cascade <= IntfCascade) {
if (!Urgent)
return false;
@@ -924,10 +940,10 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, Register PhysReg,
Cost.BrokenHints += 10;
}
// Would this break a satisfied hint?
- bool BreaksHint = VRM->hasPreferredPhys(Intf->reg);
+ bool BreaksHint = VRM->hasPreferredPhys(Intf->reg());
// Update eviction cost.
Cost.BrokenHints += BreaksHint;
- Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight);
+ Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight());
// Abort if this would be too expensive.
if (!(Cost < MaxCost))
return false;
@@ -960,7 +976,7 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, Register PhysReg,
/// when returning true.
/// \return True when interference can be evicted cheaper than MaxCost.
bool RAGreedy::canEvictInterferenceInRange(LiveInterval &VirtReg,
- Register PhysReg, SlotIndex Start,
+ MCRegister PhysReg, SlotIndex Start,
SlotIndex End,
EvictionCost &MaxCost) {
EvictionCost Cost;
@@ -969,25 +985,23 @@ bool RAGreedy::canEvictInterferenceInRange(LiveInterval &VirtReg,
LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
// Check if any interfering live range is heavier than MaxWeight.
- for (unsigned i = Q.interferingVRegs().size(); i; --i) {
- LiveInterval *Intf = Q.interferingVRegs()[i - 1];
-
+ for (const LiveInterval *Intf : reverse(Q.interferingVRegs())) {
// Check if interference overlast the segment in interest.
if (!Intf->overlaps(Start, End))
continue;
// Cannot evict non virtual reg interference.
- if (!Register::isVirtualRegister(Intf->reg))
+ if (!Register::isVirtualRegister(Intf->reg()))
return false;
// Never evict spill products. They cannot split or spill.
if (getStage(*Intf) == RS_Done)
return false;
// Would this break a satisfied hint?
- bool BreaksHint = VRM->hasPreferredPhys(Intf->reg);
+ bool BreaksHint = VRM->hasPreferredPhys(Intf->reg());
// Update eviction cost.
Cost.BrokenHints += BreaksHint;
- Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight);
+ Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight());
// Abort if this would be too expensive.
if (!(Cost < MaxCost))
return false;
@@ -1012,17 +1026,17 @@ bool RAGreedy::canEvictInterferenceInRange(LiveInterval &VirtReg,
/// \param BestEvictweight The eviction cost of that eviction
/// \return The PhysReg which is the best candidate for eviction and the
/// eviction cost in BestEvictweight
-unsigned RAGreedy::getCheapestEvicteeWeight(const AllocationOrder &Order,
- LiveInterval &VirtReg,
- SlotIndex Start, SlotIndex End,
- float *BestEvictweight) {
+MCRegister RAGreedy::getCheapestEvicteeWeight(const AllocationOrder &Order,
+ LiveInterval &VirtReg,
+ SlotIndex Start, SlotIndex End,
+ float *BestEvictweight) {
EvictionCost BestEvictCost;
BestEvictCost.setMax();
- BestEvictCost.MaxWeight = VirtReg.weight;
- unsigned BestEvicteePhys = 0;
+ BestEvictCost.MaxWeight = VirtReg.weight();
+ MCRegister BestEvicteePhys;
// Go over all physical registers and find the best candidate for eviction
- for (auto PhysReg : Order.getOrder()) {
+ for (MCRegister PhysReg : Order.getOrder()) {
if (!canEvictInterferenceInRange(VirtReg, PhysReg, Start, End,
BestEvictCost))
@@ -1038,14 +1052,14 @@ unsigned RAGreedy::getCheapestEvicteeWeight(const AllocationOrder &Order,
/// evictInterference - Evict any interferring registers that prevent VirtReg
/// from being assigned to Physreg. This assumes that canEvictInterference
/// returned true.
-void RAGreedy::evictInterference(LiveInterval &VirtReg, Register PhysReg,
+void RAGreedy::evictInterference(LiveInterval &VirtReg, MCRegister PhysReg,
SmallVectorImpl<Register> &NewVRegs) {
// Make sure that VirtReg has a cascade number, and assign that cascade
// number to every evicted register. These live ranges than then only be
// evicted by a newer cascade, preventing infinite loops.
- unsigned Cascade = ExtraRegInfo[VirtReg.reg].Cascade;
+ unsigned Cascade = ExtraRegInfo[VirtReg.reg()].Cascade;
if (!Cascade)
- Cascade = ExtraRegInfo[VirtReg.reg].Cascade = NextCascade++;
+ Cascade = ExtraRegInfo[VirtReg.reg()].Cascade = NextCascade++;
LLVM_DEBUG(dbgs() << "evicting " << printReg(PhysReg, TRI)
<< " interference: Cascade " << Cascade << '\n');
@@ -1064,21 +1078,20 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, Register PhysReg,
}
// Evict them second. This will invalidate the queries.
- for (unsigned i = 0, e = Intfs.size(); i != e; ++i) {
- LiveInterval *Intf = Intfs[i];
+ for (LiveInterval *Intf : Intfs) {
// The same VirtReg may be present in multiple RegUnits. Skip duplicates.
- if (!VRM->hasPhys(Intf->reg))
+ if (!VRM->hasPhys(Intf->reg()))
continue;
- LastEvicted.addEviction(PhysReg, VirtReg.reg, Intf->reg);
+ LastEvicted.addEviction(PhysReg, VirtReg.reg(), Intf->reg());
Matrix->unassign(*Intf);
- assert((ExtraRegInfo[Intf->reg].Cascade < Cascade ||
+ assert((ExtraRegInfo[Intf->reg()].Cascade < Cascade ||
VirtReg.isSpillable() < Intf->isSpillable()) &&
"Cannot decrease cascade number, illegal eviction");
- ExtraRegInfo[Intf->reg].Cascade = Cascade;
+ ExtraRegInfo[Intf->reg()].Cascade = Cascade;
++NumEvicted;
- NewVRegs.push_back(Intf->reg);
+ NewVRegs.push_back(Intf->reg());
}
}
@@ -1107,17 +1120,17 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
// Keep track of the cheapest interference seen so far.
EvictionCost BestCost;
BestCost.setMax();
- unsigned BestPhys = 0;
+ MCRegister BestPhys;
unsigned OrderLimit = Order.getOrder().size();
// When we are just looking for a reduced cost per use, don't break any
// hints, and only evict smaller spill weights.
if (CostPerUseLimit < ~0u) {
BestCost.BrokenHints = 0;
- BestCost.MaxWeight = VirtReg.weight;
+ BestCost.MaxWeight = VirtReg.weight();
// Check of any registers in RC are below CostPerUseLimit.
- const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg);
+ const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg());
unsigned MinCost = RegClassInfo.getMinCost(RC);
if (MinCost >= CostPerUseLimit) {
LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << " minimum cost = "
@@ -1134,8 +1147,10 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
}
}
- Order.rewind();
- while (MCRegister PhysReg = Order.next(OrderLimit)) {
+ for (auto I = Order.begin(), E = Order.getOrderLimitEnd(OrderLimit); I != E;
+ ++I) {
+ MCRegister PhysReg = *I;
+ assert(PhysReg);
if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit)
continue;
// The first use of a callee-saved register in a function has cost 1.
@@ -1156,7 +1171,7 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
BestPhys = PhysReg;
// Stop if the hint can be used.
- if (Order.isHint())
+ if (I.isHint())
break;
}
@@ -1183,9 +1198,9 @@ bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf,
// Reset interference dependent info.
SplitConstraints.resize(UseBlocks.size());
BlockFrequency StaticCost = 0;
- for (unsigned i = 0; i != UseBlocks.size(); ++i) {
- const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
- SpillPlacement::BlockConstraint &BC = SplitConstraints[i];
+ for (unsigned I = 0; I != UseBlocks.size(); ++I) {
+ const SplitAnalysis::BlockInfo &BI = UseBlocks[I];
+ SpillPlacement::BlockConstraint &BC = SplitConstraints[I];
BC.Number = BI.MBB->getNumber();
Intf.moveToBlock(BC.Number);
@@ -1256,8 +1271,7 @@ bool RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,
unsigned TBS[GroupSize];
unsigned B = 0, T = 0;
- for (unsigned i = 0; i != Blocks.size(); ++i) {
- unsigned Number = Blocks[i];
+ for (unsigned Number : Blocks) {
Intf.moveToBlock(Number);
if (!Intf.hasInterference()) {
@@ -1314,8 +1328,7 @@ bool RAGreedy::growRegion(GlobalSplitCandidate &Cand) {
while (true) {
ArrayRef<unsigned> NewBundles = SpillPlacer->getRecentPositive();
// Find new through blocks in the periphery of PrefRegBundles.
- for (int i = 0, e = NewBundles.size(); i != e; ++i) {
- unsigned Bundle = NewBundles[i];
+ for (unsigned Bundle : NewBundles) {
// Look at all blocks connected to Bundle in the full graph.
ArrayRef<unsigned> Blocks = Bundles->getBlocks(Bundle);
for (ArrayRef<unsigned>::iterator I = Blocks.begin(), E = Blocks.end();
@@ -1367,7 +1380,7 @@ bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) {
return false;
// Compact regions don't correspond to any physreg.
- Cand.reset(IntfCache, 0);
+ Cand.reset(IntfCache, MCRegister::NoRegister);
LLVM_DEBUG(dbgs() << "Compact region bundles");
@@ -1395,8 +1408,8 @@ bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) {
}
LLVM_DEBUG({
- for (int i : Cand.LiveBundles.set_bits())
- dbgs() << " EB#" << i;
+ for (int I : Cand.LiveBundles.set_bits())
+ dbgs() << " EB#" << I;
dbgs() << ".\n";
});
return true;
@@ -1407,8 +1420,7 @@ bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) {
BlockFrequency RAGreedy::calcSpillCost() {
BlockFrequency Cost = 0;
ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
- for (unsigned i = 0; i != UseBlocks.size(); ++i) {
- const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
+ for (const SplitAnalysis::BlockInfo &BI : UseBlocks) {
unsigned Number = BI.MBB->getNumber();
// We normally only need one spill instruction - a load or a store.
Cost += SpillPlacer->getBlockFrequency(Number);
@@ -1473,20 +1485,20 @@ BlockFrequency RAGreedy::calcSpillCost() {
/// artifact of Evictee.
/// \return True if splitting Evictee may cause a bad eviction chain, false
/// otherwise.
-bool RAGreedy::splitCanCauseEvictionChain(unsigned Evictee,
+bool RAGreedy::splitCanCauseEvictionChain(Register Evictee,
GlobalSplitCandidate &Cand,
unsigned BBNumber,
const AllocationOrder &Order) {
EvictionTrack::EvictorInfo VregEvictorInfo = LastEvicted.getEvictor(Evictee);
unsigned Evictor = VregEvictorInfo.first;
- unsigned PhysReg = VregEvictorInfo.second;
+ MCRegister PhysReg = VregEvictorInfo.second;
// No actual evictor.
if (!Evictor || !PhysReg)
return false;
float MaxWeight = 0;
- unsigned FutureEvictedPhysReg =
+ MCRegister FutureEvictedPhysReg =
getCheapestEvicteeWeight(Order, LIS->getInterval(Evictee),
Cand.Intf.first(), Cand.Intf.last(), &MaxWeight);
@@ -1511,10 +1523,9 @@ bool RAGreedy::splitCanCauseEvictionChain(unsigned Evictee,
// Now, check to see if the local interval we will create is going to be
// expensive enough to evict somebody If so, this may cause a bad eviction
// chain.
- VirtRegAuxInfo VRAI(*MF, *LIS, VRM, getAnalysis<MachineLoopInfo>(), *MBFI);
float splitArtifactWeight =
- VRAI.futureWeight(LIS->getInterval(Evictee),
- Cand.Intf.first().getPrevIndex(), Cand.Intf.last());
+ VRAI->futureWeight(LIS->getInterval(Evictee),
+ Cand.Intf.first().getPrevIndex(), Cand.Intf.last());
if (splitArtifactWeight >= 0 && splitArtifactWeight < MaxWeight)
return false;
@@ -1548,16 +1559,15 @@ bool RAGreedy::splitCanCauseLocalSpill(unsigned VirtRegToSplit,
// Check if the local interval will evict a cheaper interval.
float CheapestEvictWeight = 0;
- unsigned FutureEvictedPhysReg = getCheapestEvicteeWeight(
+ MCRegister FutureEvictedPhysReg = getCheapestEvicteeWeight(
Order, LIS->getInterval(VirtRegToSplit), Cand.Intf.first(),
Cand.Intf.last(), &CheapestEvictWeight);
// Have we found an interval that can be evicted?
if (FutureEvictedPhysReg) {
- VirtRegAuxInfo VRAI(*MF, *LIS, VRM, getAnalysis<MachineLoopInfo>(), *MBFI);
float splitArtifactWeight =
- VRAI.futureWeight(LIS->getInterval(VirtRegToSplit),
- Cand.Intf.first().getPrevIndex(), Cand.Intf.last());
+ VRAI->futureWeight(LIS->getInterval(VirtRegToSplit),
+ Cand.Intf.first().getPrevIndex(), Cand.Intf.last());
// Will the weight of the local interval be higher than the cheapest evictee
// weight? If so it will evict it and will not cause a spill.
if (splitArtifactWeight >= 0 && splitArtifactWeight > CheapestEvictWeight)
@@ -1578,11 +1588,11 @@ BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand,
bool *CanCauseEvictionChain) {
BlockFrequency GlobalCost = 0;
const BitVector &LiveBundles = Cand.LiveBundles;
- unsigned VirtRegToSplit = SA->getParent().reg;
+ Register VirtRegToSplit = SA->getParent().reg();
ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
- for (unsigned i = 0; i != UseBlocks.size(); ++i) {
- const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
- SpillPlacement::BlockConstraint &BC = SplitConstraints[i];
+ for (unsigned I = 0; I != UseBlocks.size(); ++I) {
+ const SplitAnalysis::BlockInfo &BI = UseBlocks[I];
+ SpillPlacement::BlockConstraint &BC = SplitConstraints[I];
bool RegIn = LiveBundles[Bundles->getBundle(BC.Number, false)];
bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, true)];
unsigned Ins = 0;
@@ -1620,8 +1630,7 @@ BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand,
GlobalCost += SpillPlacer->getBlockFrequency(BC.Number);
}
- for (unsigned i = 0, e = Cand.ActiveBlocks.size(); i != e; ++i) {
- unsigned Number = Cand.ActiveBlocks[i];
+ for (unsigned Number : Cand.ActiveBlocks) {
bool RegIn = LiveBundles[Bundles->getBundle(Number, false)];
bool RegOut = LiveBundles[Bundles->getBundle(Number, true)];
if (!RegIn && !RegOut)
@@ -1679,13 +1688,12 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
// Isolate even single instructions when dealing with a proper sub-class.
// That guarantees register class inflation for the stack interval because it
// is all copies.
- unsigned Reg = SA->getParent().reg;
+ Register Reg = SA->getParent().reg();
bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
// First handle all the blocks with uses.
ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
- for (unsigned i = 0; i != UseBlocks.size(); ++i) {
- const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
+ for (const SplitAnalysis::BlockInfo &BI : UseBlocks) {
unsigned Number = BI.MBB->getNumber();
unsigned IntvIn = 0, IntvOut = 0;
SlotIndex IntfIn, IntfOut;
@@ -1730,8 +1738,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
BitVector Todo = SA->getThroughBlocks();
for (unsigned c = 0; c != UsedCands.size(); ++c) {
ArrayRef<unsigned> Blocks = GlobalCand[UsedCands[c]].ActiveBlocks;
- for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
- unsigned Number = Blocks[i];
+ for (unsigned Number : Blocks) {
if (!Todo.test(Number))
continue;
Todo.reset(Number);
@@ -1774,8 +1781,8 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
// - Candidate intervals can be assigned to Cand.PhysReg.
// - Block-local splits are candidates for local splitting.
// - DCE leftovers should go back on the queue.
- for (unsigned i = 0, e = LREdit.size(); i != e; ++i) {
- LiveInterval &Reg = LIS->getInterval(LREdit.get(i));
+ for (unsigned I = 0, E = LREdit.size(); I != E; ++I) {
+ LiveInterval &Reg = LIS->getInterval(LREdit.get(I));
// Ignore old intervals from DCE.
if (getStage(Reg) != RS_New)
@@ -1783,14 +1790,14 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
// Remainder interval. Don't try splitting again, spill if it doesn't
// allocate.
- if (IntvMap[i] == 0) {
+ if (IntvMap[I] == 0) {
setStage(Reg, RS_Spill);
continue;
}
// Global intervals. Allow repeated splitting as long as the number of live
// blocks is strictly decreasing.
- if (IntvMap[i] < NumGlobalIntvs) {
+ if (IntvMap[I] < NumGlobalIntvs) {
if (SA->countLiveBlocks(&Reg) >= OrigBlocks) {
LLVM_DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks
<< " blocks as original.\n");
@@ -1808,10 +1815,11 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
MF->verify(this, "After splitting live range around region");
}
-unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
- SmallVectorImpl<Register> &NewVRegs) {
+MCRegister RAGreedy::tryRegionSplit(LiveInterval &VirtReg,
+ AllocationOrder &Order,
+ SmallVectorImpl<Register> &NewVRegs) {
if (!TRI->shouldRegionSplitForVirtReg(*MF, VirtReg))
- return 0;
+ return MCRegister::NoRegister;
unsigned NumCands = 0;
BlockFrequency SpillCost = calcSpillCost();
BlockFrequency BestCost;
@@ -1841,12 +1849,12 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
// current max frequency.
if (HasCompact && (BestCost > SpillCost) && (BestCand != NoCand) &&
CanCauseEvictionChain) {
- return 0;
+ return MCRegister::NoRegister;
}
// No solutions found, fall back to single block splitting.
if (!HasCompact && BestCand == NoCand)
- return 0;
+ return MCRegister::NoRegister;
return doRegionSplit(VirtReg, BestCand, HasCompact, NewVRegs);
}
@@ -1857,8 +1865,8 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg,
unsigned &NumCands, bool IgnoreCSR,
bool *CanCauseEvictionChain) {
unsigned BestCand = NoCand;
- Order.rewind();
- while (unsigned PhysReg = Order.next()) {
+ for (MCPhysReg PhysReg : Order) {
+ assert(PhysReg);
if (IgnoreCSR && isUnusedCalleeSavedReg(PhysReg))
continue;
@@ -1867,12 +1875,12 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg,
if (NumCands == IntfCache.getMaxCursors()) {
unsigned WorstCount = ~0u;
unsigned Worst = 0;
- for (unsigned i = 0; i != NumCands; ++i) {
- if (i == BestCand || !GlobalCand[i].PhysReg)
+ for (unsigned CandIndex = 0; CandIndex != NumCands; ++CandIndex) {
+ if (CandIndex == BestCand || !GlobalCand[CandIndex].PhysReg)
continue;
- unsigned Count = GlobalCand[i].LiveBundles.count();
+ unsigned Count = GlobalCand[CandIndex].LiveBundles.count();
if (Count < WorstCount) {
- Worst = i;
+ Worst = CandIndex;
WorstCount = Count;
}
}
@@ -1923,8 +1931,8 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg,
LLVM_DEBUG({
dbgs() << ", total = ";
MBFI->printBlockFreq(dbgs(), Cost) << " with bundles";
- for (int i : Cand.LiveBundles.set_bits())
- dbgs() << " EB#" << i;
+ for (int I : Cand.LiveBundles.set_bits())
+ dbgs() << " EB#" << I;
dbgs() << ".\n";
});
if (Cost < BestCost) {
@@ -1942,7 +1950,7 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg,
// See splitCanCauseEvictionChain for detailed description of bad
// eviction chain scenarios.
LLVM_DEBUG(dbgs() << "Best split candidate of vreg "
- << printReg(VirtReg.reg, TRI) << " may ");
+ << printReg(VirtReg.reg(), TRI) << " may ");
if (!(*CanCauseEvictionChain))
LLVM_DEBUG(dbgs() << "not ");
LLVM_DEBUG(dbgs() << "cause bad eviction chain\n");
@@ -2001,13 +2009,12 @@ unsigned RAGreedy::doRegionSplit(LiveInterval &VirtReg, unsigned BestCand,
unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order,
SmallVectorImpl<Register> &NewVRegs) {
assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed");
- Register Reg = VirtReg.reg;
+ Register Reg = VirtReg.reg();
bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
SE->reset(LREdit, SplitSpillMode);
ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
- for (unsigned i = 0; i != UseBlocks.size(); ++i) {
- const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
+ for (const SplitAnalysis::BlockInfo &BI : UseBlocks) {
if (SA->shouldSplitSingleBlock(BI, SingleInstrs))
SE->splitSingleBlock(BI);
}
@@ -2026,9 +2033,9 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order,
// Sort out the new intervals created by splitting. The remainder interval
// goes straight to spilling, the new local ranges get to stay RS_New.
- for (unsigned i = 0, e = LREdit.size(); i != e; ++i) {
- LiveInterval &LI = LIS->getInterval(LREdit.get(i));
- if (getStage(LI) == RS_New && IntvMap[i] == 0)
+ for (unsigned I = 0, E = LREdit.size(); I != E; ++I) {
+ LiveInterval &LI = LIS->getInterval(LREdit.get(I));
+ if (getStage(LI) == RS_New && IntvMap[I] == 0)
setStage(LI, RS_Spill);
}
@@ -2044,7 +2051,7 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order,
/// Get the number of allocatable registers that match the constraints of \p Reg
/// on \p MI and that are also in \p SuperRC.
static unsigned getNumAllocatableRegsForConstraints(
- const MachineInstr *MI, unsigned Reg, const TargetRegisterClass *SuperRC,
+ const MachineInstr *MI, Register Reg, const TargetRegisterClass *SuperRC,
const TargetInstrInfo *TII, const TargetRegisterInfo *TRI,
const RegisterClassInfo &RCI) {
assert(SuperRC && "Invalid register class");
@@ -2067,7 +2074,7 @@ static unsigned getNumAllocatableRegsForConstraints(
unsigned
RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
SmallVectorImpl<Register> &NewVRegs) {
- const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg);
+ const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg());
// There is no point to this if there are no larger sub-classes.
if (!RegClassInfo.isProperSubClass(CurRC))
return 0;
@@ -2091,18 +2098,18 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
// the constraints on the virtual register.
// Otherwise, splitting just inserts uncoalescable copies that do not help
// the allocation.
- for (unsigned i = 0; i != Uses.size(); ++i) {
- if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Uses[i]))
+ for (const auto &Use : Uses) {
+ if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Use))
if (MI->isFullCopy() ||
SuperRCNumAllocatableRegs ==
- getNumAllocatableRegsForConstraints(MI, VirtReg.reg, SuperRC, TII,
- TRI, RCI)) {
- LLVM_DEBUG(dbgs() << " skip:\t" << Uses[i] << '\t' << *MI);
+ getNumAllocatableRegsForConstraints(MI, VirtReg.reg(), SuperRC,
+ TII, TRI, RCI)) {
+ LLVM_DEBUG(dbgs() << " skip:\t" << Use << '\t' << *MI);
continue;
}
SE->openIntv();
- SlotIndex SegStart = SE->enterIntvBefore(Uses[i]);
- SlotIndex SegStop = SE->leaveIntvAfter(Uses[i]);
+ SlotIndex SegStart = SE->enterIntvBefore(Use);
+ SlotIndex SegStop = SE->leaveIntvAfter(Use);
SE->useIntv(SegStart, SegStop);
}
@@ -2113,7 +2120,7 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
SmallVector<unsigned, 8> IntvMap;
SE->finish(&IntvMap);
- DebugVars->splitRegister(VirtReg.reg, LREdit.regs(), *LIS);
+ DebugVars->splitRegister(VirtReg.reg(), LREdit.regs(), *LIS);
ExtraRegInfo.resize(MRI->getNumVirtRegs());
// Assign all new registers to RS_Spill. This was the last chance.
@@ -2128,9 +2135,9 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
/// calcGapWeights - Compute the maximum spill weight that needs to be evicted
/// in order to use PhysReg between two entries in SA->UseSlots.
///
-/// GapWeight[i] represents the gap between UseSlots[i] and UseSlots[i+1].
+/// GapWeight[I] represents the gap between UseSlots[I] and UseSlots[I + 1].
///
-void RAGreedy::calcGapWeights(unsigned PhysReg,
+void RAGreedy::calcGapWeights(MCRegister PhysReg,
SmallVectorImpl<float> &GapWeight) {
assert(SA->getUseBlocks().size() == 1 && "Not a local interval");
const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
@@ -2169,7 +2176,7 @@ void RAGreedy::calcGapWeights(unsigned PhysReg,
break;
// Update the gaps covered by IntI.
- const float weight = IntI.value()->weight;
+ const float weight = IntI.value()->weight();
for (; Gap != NumGaps; ++Gap) {
GapWeight[Gap] = std::max(GapWeight[Gap], weight);
if (Uses[Gap+1].getBaseIndex() >= IntI.stop())
@@ -2231,8 +2238,8 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
LLVM_DEBUG({
dbgs() << "tryLocalSplit: ";
- for (unsigned i = 0, e = Uses.size(); i != e; ++i)
- dbgs() << ' ' << Uses[i];
+ for (const auto &Use : Uses)
+ dbgs() << ' ' << Use;
dbgs() << '\n';
});
@@ -2244,25 +2251,25 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
ArrayRef<SlotIndex> RMS = LIS->getRegMaskSlotsInBlock(BI.MBB->getNumber());
LLVM_DEBUG(dbgs() << RMS.size() << " regmasks in block:");
// Constrain to VirtReg's live range.
- unsigned ri =
+ unsigned RI =
llvm::lower_bound(RMS, Uses.front().getRegSlot()) - RMS.begin();
- unsigned re = RMS.size();
- for (unsigned i = 0; i != NumGaps && ri != re; ++i) {
- // Look for Uses[i] <= RMS <= Uses[i+1].
- assert(!SlotIndex::isEarlierInstr(RMS[ri], Uses[i]));
- if (SlotIndex::isEarlierInstr(Uses[i+1], RMS[ri]))
+ unsigned RE = RMS.size();
+ for (unsigned I = 0; I != NumGaps && RI != RE; ++I) {
+ // Look for Uses[I] <= RMS <= Uses[I + 1].
+ assert(!SlotIndex::isEarlierInstr(RMS[RI], Uses[I]));
+ if (SlotIndex::isEarlierInstr(Uses[I + 1], RMS[RI]))
continue;
// Skip a regmask on the same instruction as the last use. It doesn't
// overlap the live range.
- if (SlotIndex::isSameInstr(Uses[i+1], RMS[ri]) && i+1 == NumGaps)
+ if (SlotIndex::isSameInstr(Uses[I + 1], RMS[RI]) && I + 1 == NumGaps)
break;
- LLVM_DEBUG(dbgs() << ' ' << RMS[ri] << ':' << Uses[i] << '-'
- << Uses[i + 1]);
- RegMaskGaps.push_back(i);
+ LLVM_DEBUG(dbgs() << ' ' << RMS[RI] << ':' << Uses[I] << '-'
+ << Uses[I + 1]);
+ RegMaskGaps.push_back(I);
// Advance ri to the next gap. A regmask on one of the uses counts in
// both gaps.
- while (ri != re && SlotIndex::isEarlierInstr(RMS[ri], Uses[i+1]))
- ++ri;
+ while (RI != RE && SlotIndex::isEarlierInstr(RMS[RI], Uses[I + 1]))
+ ++RI;
}
LLVM_DEBUG(dbgs() << '\n');
}
@@ -2297,16 +2304,16 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
(1.0f / MBFI->getEntryFreq());
SmallVector<float, 8> GapWeight;
- Order.rewind();
- while (unsigned PhysReg = Order.next()) {
+ for (MCPhysReg PhysReg : Order) {
+ assert(PhysReg);
// Keep track of the largest spill weight that would need to be evicted in
- // order to make use of PhysReg between UseSlots[i] and UseSlots[i+1].
+ // order to make use of PhysReg between UseSlots[I] and UseSlots[I + 1].
calcGapWeights(PhysReg, GapWeight);
// Remove any gaps with regmask clobbers.
if (Matrix->checkRegMaskInterference(VirtReg, PhysReg))
- for (unsigned i = 0, e = RegMaskGaps.size(); i != e; ++i)
- GapWeight[RegMaskGaps[i]] = huge_valf;
+ for (unsigned I = 0, E = RegMaskGaps.size(); I != E; ++I)
+ GapWeight[RegMaskGaps[I]] = huge_valf;
// Try to find the best sequence of gaps to close.
// The new spill weight must be larger than any gap interference.
@@ -2324,7 +2331,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
const bool LiveAfter = SplitAfter != NumGaps || BI.LiveOut;
LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << ' ' << Uses[SplitBefore]
- << '-' << Uses[SplitAfter] << " i=" << MaxGap);
+ << '-' << Uses[SplitAfter] << " I=" << MaxGap);
// Stop before the interval gets so big we wouldn't be making progress.
if (!LiveBefore && !LiveAfter) {
@@ -2373,8 +2380,8 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
// Recompute the max when necessary.
if (GapWeight[SplitBefore - 1] >= MaxGap) {
MaxGap = GapWeight[SplitBefore];
- for (unsigned i = SplitBefore + 1; i != SplitAfter; ++i)
- MaxGap = std::max(MaxGap, GapWeight[i]);
+ for (unsigned I = SplitBefore + 1; I != SplitAfter; ++I)
+ MaxGap = std::max(MaxGap, GapWeight[I]);
}
continue;
}
@@ -2409,7 +2416,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
SE->useIntv(SegStart, SegStop);
SmallVector<unsigned, 8> IntvMap;
SE->finish(&IntvMap);
- DebugVars->splitRegister(VirtReg.reg, LREdit.regs(), *LIS);
+ DebugVars->splitRegister(VirtReg.reg(), LREdit.regs(), *LIS);
// If the new range has the same number of instructions as before, mark it as
// RS_Split2 so the next split will be forced to make progress. Otherwise,
@@ -2420,10 +2427,10 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
if (NewGaps >= NumGaps) {
LLVM_DEBUG(dbgs() << "Tagging non-progress ranges: ");
assert(!ProgressRequired && "Didn't make progress when it was required.");
- for (unsigned i = 0, e = IntvMap.size(); i != e; ++i)
- if (IntvMap[i] == 1) {
- setStage(LIS->getInterval(LREdit.get(i)), RS_Split2);
- LLVM_DEBUG(dbgs() << printReg(LREdit.get(i)));
+ for (unsigned I = 0, E = IntvMap.size(); I != E; ++I)
+ if (IntvMap[I] == 1) {
+ setStage(LIS->getInterval(LREdit.get(I)), RS_Split2);
+ LLVM_DEBUG(dbgs() << printReg(LREdit.get(I)));
}
LLVM_DEBUG(dbgs() << '\n');
}
@@ -2477,7 +2484,7 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
// ranges already made dubious progress with region splitting, so they go
// straight to single block splitting.
if (getStage(VirtReg) < RS_Split2) {
- unsigned PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs);
+ MCRegister PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs);
if (PhysReg || !NewVRegs.empty())
return PhysReg;
}
@@ -2507,11 +2514,10 @@ static bool hasTiedDef(MachineRegisterInfo *MRI, unsigned reg) {
/// for \p VirtReg.
/// \p FixedRegisters contains all the virtual registers that cannot be
/// recolored.
-bool
-RAGreedy::mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg,
- SmallLISet &RecoloringCandidates,
- const SmallVirtRegSet &FixedRegisters) {
- const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg);
+bool RAGreedy::mayRecolorAllInterferences(
+ MCRegister PhysReg, LiveInterval &VirtReg, SmallLISet &RecoloringCandidates,
+ const SmallVirtRegSet &FixedRegisters) {
+ const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg());
for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
@@ -2523,16 +2529,16 @@ RAGreedy::mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg,
CutOffInfo |= CO_Interf;
return false;
}
- for (unsigned i = Q.interferingVRegs().size(); i; --i) {
- LiveInterval *Intf = Q.interferingVRegs()[i - 1];
+ for (LiveInterval *Intf : reverse(Q.interferingVRegs())) {
// If Intf is done and sit on the same register class as VirtReg,
// it would not be recolorable as it is in the same state as VirtReg.
// However, if VirtReg has tied defs and Intf doesn't, then
// there is still a point in examining if it can be recolorable.
if (((getStage(*Intf) == RS_Done &&
- MRI->getRegClass(Intf->reg) == CurRC) &&
- !(hasTiedDef(MRI, VirtReg.reg) && !hasTiedDef(MRI, Intf->reg))) ||
- FixedRegisters.count(Intf->reg)) {
+ MRI->getRegClass(Intf->reg()) == CurRC) &&
+ !(hasTiedDef(MRI, VirtReg.reg()) &&
+ !hasTiedDef(MRI, Intf->reg()))) ||
+ FixedRegisters.count(Intf->reg())) {
LLVM_DEBUG(
dbgs() << "Early abort: the interference is not recolorable.\n");
return false;
@@ -2587,6 +2593,9 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
SmallVectorImpl<Register> &NewVRegs,
SmallVirtRegSet &FixedRegisters,
unsigned Depth) {
+ if (!TRI->shouldUseLastChanceRecoloringForVirtReg(*MF, VirtReg))
+ return ~0u;
+
LLVM_DEBUG(dbgs() << "Try last chance recoloring for " << VirtReg << '\n');
// Ranges must be Done.
assert((getStage(VirtReg) >= RS_Done || !VirtReg.isSpillable()) &&
@@ -2605,15 +2614,15 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
SmallLISet RecoloringCandidates;
// Record the original mapping virtual register to physical register in case
// the recoloring fails.
- DenseMap<Register, Register> VirtRegToPhysReg;
+ DenseMap<Register, MCRegister> VirtRegToPhysReg;
// Mark VirtReg as fixed, i.e., it will not be recolored pass this point in
// this recoloring "session".
- assert(!FixedRegisters.count(VirtReg.reg));
- FixedRegisters.insert(VirtReg.reg);
+ assert(!FixedRegisters.count(VirtReg.reg()));
+ FixedRegisters.insert(VirtReg.reg());
SmallVector<Register, 4> CurrentNewVRegs;
- Order.rewind();
- while (Register PhysReg = Order.next()) {
+ for (MCRegister PhysReg : Order) {
+ assert(PhysReg.isValid());
LLVM_DEBUG(dbgs() << "Try to assign: " << VirtReg << " to "
<< printReg(PhysReg, TRI) << '\n');
RecoloringCandidates.clear();
@@ -2644,7 +2653,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
for (SmallLISet::iterator It = RecoloringCandidates.begin(),
EndIt = RecoloringCandidates.end();
It != EndIt; ++It) {
- Register ItVirtReg = (*It)->reg;
+ Register ItVirtReg = (*It)->reg();
enqueue(RecoloringQueue, *It);
assert(VRM->hasPhys(ItVirtReg) &&
"Interferences are supposed to be with allocated variables");
@@ -2697,10 +2706,10 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
for (SmallLISet::iterator It = RecoloringCandidates.begin(),
EndIt = RecoloringCandidates.end();
It != EndIt; ++It) {
- Register ItVirtReg = (*It)->reg;
+ Register ItVirtReg = (*It)->reg();
if (VRM->hasPhys(ItVirtReg))
Matrix->unassign(**It);
- Register ItPhysReg = VirtRegToPhysReg[ItVirtReg];
+ MCRegister ItPhysReg = VirtRegToPhysReg[ItVirtReg];
Matrix->assign(**It, ItPhysReg);
}
}
@@ -2724,8 +2733,8 @@ bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue,
while (!RecoloringQueue.empty()) {
LiveInterval *LI = dequeue(RecoloringQueue);
LLVM_DEBUG(dbgs() << "Try to recolor: " << *LI << '\n');
- Register PhysReg = selectOrSplitImpl(*LI, NewVRegs, FixedRegisters,
- Depth + 1);
+ MCRegister PhysReg =
+ selectOrSplitImpl(*LI, NewVRegs, FixedRegisters, Depth + 1);
// When splitting happens, the live-range may actually be empty.
// In that case, this is okay to continue the recoloring even
// if we did not find an alternative color for it. Indeed,
@@ -2743,7 +2752,7 @@ bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue,
<< " succeeded with: " << printReg(PhysReg, TRI) << '\n');
Matrix->assign(*LI, PhysReg);
- FixedRegisters.insert(LI->reg);
+ FixedRegisters.insert(LI->reg());
}
return true;
}
@@ -2752,12 +2761,12 @@ bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue,
// Main Entry Point
//===----------------------------------------------------------------------===//
-Register RAGreedy::selectOrSplit(LiveInterval &VirtReg,
- SmallVectorImpl<Register> &NewVRegs) {
+MCRegister RAGreedy::selectOrSplit(LiveInterval &VirtReg,
+ SmallVectorImpl<Register> &NewVRegs) {
CutOffInfo = CO_None;
LLVMContext &Ctx = MF->getFunction().getContext();
SmallVirtRegSet FixedRegisters;
- Register Reg = selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters);
+ MCRegister Reg = selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters);
if (Reg == ~0U && (CutOffInfo != CO_None)) {
uint8_t CutOffEncountered = CutOffInfo & (CO_Depth | CO_Interf);
if (CutOffEncountered == CO_Depth)
@@ -2782,11 +2791,10 @@ Register RAGreedy::selectOrSplit(LiveInterval &VirtReg,
/// Spilling a live range in the cold path can have lower cost than using
/// the CSR for the first time. Returns the physical register if we decide
/// to use the CSR; otherwise return 0.
-unsigned RAGreedy::tryAssignCSRFirstTime(LiveInterval &VirtReg,
- AllocationOrder &Order,
- Register PhysReg,
- unsigned &CostPerUseLimit,
- SmallVectorImpl<Register> &NewVRegs) {
+MCRegister
+RAGreedy::tryAssignCSRFirstTime(LiveInterval &VirtReg, AllocationOrder &Order,
+ MCRegister PhysReg, unsigned &CostPerUseLimit,
+ SmallVectorImpl<Register> &NewVRegs) {
if (getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) {
// We choose spill over using the CSR for the first time if the spill cost
// is lower than CSRCost.
@@ -2851,7 +2859,7 @@ void RAGreedy::initializeCSRCost() {
/// Collect the hint info for \p Reg.
/// The results are stored into \p Out.
/// \p Out is not cleared before being populated.
-void RAGreedy::collectHintInfo(unsigned Reg, HintsInfo &Out) {
+void RAGreedy::collectHintInfo(Register Reg, HintsInfo &Out) {
for (const MachineInstr &Instr : MRI->reg_nodbg_instructions(Reg)) {
if (!Instr.isFullCopy())
continue;
@@ -2863,9 +2871,8 @@ void RAGreedy::collectHintInfo(unsigned Reg, HintsInfo &Out) {
continue;
}
// Get the current assignment.
- Register OtherPhysReg = Register::isPhysicalRegister(OtherReg)
- ? OtherReg
- : VRM->getPhys(OtherReg);
+ MCRegister OtherPhysReg =
+ OtherReg.isPhysical() ? OtherReg.asMCReg() : VRM->getPhys(OtherReg);
// Push the collected information.
Out.push_back(HintInfo(MBFI->getBlockFreq(Instr.getParent()), OtherReg,
OtherPhysReg));
@@ -2876,7 +2883,7 @@ void RAGreedy::collectHintInfo(unsigned Reg, HintsInfo &Out) {
/// \p PhysReg was used.
/// \return The cost of \p List for \p PhysReg.
BlockFrequency RAGreedy::getBrokenHintFreq(const HintsInfo &List,
- unsigned PhysReg) {
+ MCRegister PhysReg) {
BlockFrequency Cost = 0;
for (const HintInfo &Info : List) {
if (Info.PhysReg != PhysReg)
@@ -2897,11 +2904,11 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) {
// We have a broken hint, check if it is possible to fix it by
// reusing PhysReg for the copy-related live-ranges. Indeed, we evicted
// some register and PhysReg may be available for the other live-ranges.
- SmallSet<unsigned, 4> Visited;
+ SmallSet<Register, 4> Visited;
SmallVector<unsigned, 2> RecoloringCandidates;
HintsInfo Info;
- unsigned Reg = VirtReg.reg;
- Register PhysReg = VRM->getPhys(Reg);
+ Register Reg = VirtReg.reg();
+ MCRegister PhysReg = VRM->getPhys(Reg);
// Start the recoloring algorithm from the input live-interval, then
// it will propagate to the ones that are copy-related with it.
Visited.insert(Reg);
@@ -2922,7 +2929,7 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) {
// Get the live interval mapped with this virtual register to be able
// to check for the interference with the new color.
LiveInterval &LI = LIS->getInterval(Reg);
- Register CurrPhys = VRM->getPhys(Reg);
+ MCRegister CurrPhys = VRM->getPhys(Reg);
// Check that the new color matches the register class constraints and
// that it is free for this live range.
if (CurrPhys != PhysReg && (!MRI->getRegClass(Reg)->contains(PhysReg) ||
@@ -3003,33 +3010,35 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) {
/// getting rid of 2 copies.
void RAGreedy::tryHintsRecoloring() {
for (LiveInterval *LI : SetOfBrokenHints) {
- assert(Register::isVirtualRegister(LI->reg) &&
+ assert(Register::isVirtualRegister(LI->reg()) &&
"Recoloring is possible only for virtual registers");
// Some dead defs may be around (e.g., because of debug uses).
// Ignore those.
- if (!VRM->hasPhys(LI->reg))
+ if (!VRM->hasPhys(LI->reg()))
continue;
tryHintRecoloring(*LI);
}
}
-Register RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
- SmallVectorImpl<Register> &NewVRegs,
- SmallVirtRegSet &FixedRegisters,
- unsigned Depth) {
+MCRegister RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
+ SmallVectorImpl<Register> &NewVRegs,
+ SmallVirtRegSet &FixedRegisters,
+ unsigned Depth) {
unsigned CostPerUseLimit = ~0u;
// First try assigning a free register.
- AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix);
- if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs, FixedRegisters)) {
+ auto Order =
+ AllocationOrder::create(VirtReg.reg(), *VRM, RegClassInfo, Matrix);
+ if (MCRegister PhysReg =
+ tryAssign(VirtReg, Order, NewVRegs, FixedRegisters)) {
// If VirtReg got an assignment, the eviction info is no longre relevant.
- LastEvicted.clearEvicteeInfo(VirtReg.reg);
+ LastEvicted.clearEvicteeInfo(VirtReg.reg());
// When NewVRegs is not empty, we may have made decisions such as evicting
// a virtual register, go with the earlier decisions and use the physical
// register.
if (CSRCost.getFrequency() && isUnusedCalleeSavedReg(PhysReg) &&
NewVRegs.empty()) {
- Register CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg,
- CostPerUseLimit, NewVRegs);
+ MCRegister CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg,
+ CostPerUseLimit, NewVRegs);
if (CSRReg || !NewVRegs.empty())
// Return now if we decide to use a CSR or create new vregs due to
// pre-splitting.
@@ -3040,7 +3049,7 @@ Register RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
LiveRangeStage Stage = getStage(VirtReg);
LLVM_DEBUG(dbgs() << StageName[Stage] << " Cascade "
- << ExtraRegInfo[VirtReg.reg].Cascade << '\n');
+ << ExtraRegInfo[VirtReg.reg()].Cascade << '\n');
// Try to evict a less worthy live range, but only for ranges from the primary
// queue. The RS_Split ranges already failed to do this, and they should not
@@ -3049,7 +3058,7 @@ Register RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
if (Register PhysReg =
tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit,
FixedRegisters)) {
- Register Hint = MRI->getSimpleHint(VirtReg.reg);
+ Register Hint = MRI->getSimpleHint(VirtReg.reg());
// If VirtReg has a hint and that hint is broken record this
// virtual register as a recoloring candidate for broken hint.
// Indeed, since we evicted a variable in its neighborhood it is
@@ -3059,7 +3068,7 @@ Register RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
SetOfBrokenHints.insert(&VirtReg);
// If VirtReg eviction someone, the eviction info for it as an evictee is
// no longre relevant.
- LastEvicted.clearEvicteeInfo(VirtReg.reg);
+ LastEvicted.clearEvicteeInfo(VirtReg.reg());
return PhysReg;
}
@@ -3071,7 +3080,7 @@ Register RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
if (Stage < RS_Split) {
setStage(VirtReg, RS_Split);
LLVM_DEBUG(dbgs() << "wait for second round\n");
- NewVRegs.push_back(VirtReg.reg);
+ NewVRegs.push_back(VirtReg.reg());
return 0;
}
@@ -3081,7 +3090,7 @@ Register RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
Register PhysReg = trySplit(VirtReg, Order, NewVRegs, FixedRegisters);
if (PhysReg || (NewVRegs.size() - NewVRegSizeBefore)) {
// If VirtReg got split, the eviction info is no longer relevant.
- LastEvicted.clearEvicteeInfo(VirtReg.reg);
+ LastEvicted.clearEvicteeInfo(VirtReg.reg());
return PhysReg;
}
}
@@ -3093,14 +3102,16 @@ Register RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
Depth);
// Finally spill VirtReg itself.
- if (EnableDeferredSpilling && getStage(VirtReg) < RS_Memory) {
+ if ((EnableDeferredSpilling ||
+ TRI->shouldUseDeferredSpillingForVirtReg(*MF, VirtReg)) &&
+ getStage(VirtReg) < RS_Memory) {
// TODO: This is experimental and in particular, we do not model
// the live range splitting done by spilling correctly.
// We would need a deep integration with the spiller to do the
// right thing here. Anyway, that is still good for early testing.
setStage(VirtReg, RS_Memory);
LLVM_DEBUG(dbgs() << "Do as if this register is in memory\n");
- NewVRegs.push_back(VirtReg.reg);
+ NewVRegs.push_back(VirtReg.reg());
} else {
NamedRegionTimer T("spill", "Spiller", TimerGroupName,
TimerGroupDescription, TimePassesIsEnabled);
@@ -3111,7 +3122,7 @@ Register RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
// Tell LiveDebugVariables about the new ranges. Ranges not being covered by
// the new regs are kept in LDV (still mapping to the old register), until
// we rewrite spilled locations in LDV at a later stage.
- DebugVars->splitRegister(VirtReg.reg, LRE.regs(), *LIS);
+ DebugVars->splitRegister(VirtReg.reg(), LRE.regs(), *LIS);
if (VerifyEnabled)
MF->verify(this, "After spilling");
@@ -3230,7 +3241,9 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
initializeCSRCost();
- calculateSpillWeightsAndHints(*LIS, mf, VRM, *Loops, *MBFI);
+ VRAI = std::make_unique<VirtRegAuxInfo>(*MF, *LIS, *VRM, *Loops, *MBFI);
+
+ VRAI->calculateSpillWeightsAndHints();
LLVM_DEBUG(LIS->dump());
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPBQP.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPBQP.cpp
index 7590dbf1b977..7c5af1a0c56e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPBQP.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPBQP.cpp
@@ -140,14 +140,13 @@ public:
MachineFunctionProperties::Property::NoPHIs);
}
+ MachineFunctionProperties getClearedProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::IsSSA);
+ }
+
private:
- using LI2NodeMap = std::map<const LiveInterval *, unsigned>;
- using Node2LIMap = std::vector<const LiveInterval *>;
- using AllowedSet = std::vector<unsigned>;
- using AllowedSetMap = std::vector<AllowedSet>;
- using RegPair = std::pair<unsigned, unsigned>;
- using CoalesceMap = std::map<RegPair, PBQP::PBQPNum>;
- using RegSet = std::set<unsigned>;
+ using RegSet = std::set<Register>;
char *customPassID;
@@ -199,7 +198,7 @@ public:
for (auto NId : G.nodeIds()) {
PBQP::PBQPNum SpillCost =
- LIS.getInterval(G.getNodeMetadata(NId).getVReg()).weight;
+ LIS.getInterval(G.getNodeMetadata(NId).getVReg()).weight();
if (SpillCost == 0.0)
SpillCost = std::numeric_limits<PBQP::PBQPNum>::min();
else
@@ -231,9 +230,9 @@ private:
return false;
if (NRegs < MRegs)
- return D.count(IKey(NRegs, MRegs)) > 0;
+ return D.contains(IKey(NRegs, MRegs));
- return D.count(IKey(MRegs, NRegs)) > 0;
+ return D.contains(IKey(MRegs, NRegs));
}
void setDisjointAllowedRegs(const PBQPRAGraph &G, PBQPRAGraph::NodeId NId,
@@ -290,7 +289,7 @@ private:
// If two intervals end at the same point, we need a way to break the tie or
// the set will assume they're actually equal and refuse to insert a
// "duplicate". Just compare the vregs - fast and guaranteed unique.
- return std::get<0>(I1)->reg < std::get<0>(I2)->reg;
+ return std::get<0>(I1)->reg() < std::get<0>(I2)->reg();
}
static bool isAtLastSegment(const IntervalInfo &I) {
@@ -331,7 +330,7 @@ public:
// Start by building the inactive set.
for (auto NId : G.nodeIds()) {
- unsigned VReg = G.getNodeMetadata(NId).getVReg();
+ Register VReg = G.getNodeMetadata(NId).getVReg();
LiveInterval &LI = LIS.getInterval(VReg);
assert(!LI.empty() && "PBQP graph contains node for empty interval");
Inactive.push(std::make_tuple(&LI, 0, NId));
@@ -413,9 +412,9 @@ private:
PBQPRAGraph::RawMatrix M(NRegs.size() + 1, MRegs.size() + 1, 0);
bool NodesInterfere = false;
for (unsigned I = 0; I != NRegs.size(); ++I) {
- unsigned PRegN = NRegs[I];
+ MCRegister PRegN = NRegs[I];
for (unsigned J = 0; J != MRegs.size(); ++J) {
- unsigned PRegM = MRegs[J];
+ MCRegister PRegM = MRegs[J];
if (TRI.regsOverlap(PRegN, PRegM)) {
M[I + 1][J + 1] = std::numeric_limits<PBQP::PBQPNum>::infinity();
NodesInterfere = true;
@@ -448,11 +447,10 @@ public:
if (!CP.setRegisters(&MI) || CP.getSrcReg() == CP.getDstReg())
continue;
- unsigned DstReg = CP.getDstReg();
- unsigned SrcReg = CP.getSrcReg();
+ Register DstReg = CP.getDstReg();
+ Register SrcReg = CP.getSrcReg();
- const float Scale = 1.0f / MBFI.getEntryFreq();
- PBQP::PBQPNum CBenefit = MBFI.getBlockFreq(&MBB).getFrequency() * Scale;
+ PBQP::PBQPNum CBenefit = MBFI.getBlockFreqRelativeToEntryBlock(&MBB);
if (CP.isPhys()) {
if (!MF.getRegInfo().isAllocatable(DstReg))
@@ -464,7 +462,7 @@ public:
G.getNodeMetadata(NId).getAllowedRegs();
unsigned PRegOpt = 0;
- while (PRegOpt < Allowed.size() && Allowed[PRegOpt] != DstReg)
+ while (PRegOpt < Allowed.size() && Allowed[PRegOpt].id() != DstReg)
++PRegOpt;
if (PRegOpt < Allowed.size()) {
@@ -509,9 +507,9 @@ private:
assert(CostMat.getRows() == Allowed1.size() + 1 && "Size mismatch.");
assert(CostMat.getCols() == Allowed2.size() + 1 && "Size mismatch.");
for (unsigned I = 0; I != Allowed1.size(); ++I) {
- unsigned PReg1 = Allowed1[I];
+ MCRegister PReg1 = Allowed1[I];
for (unsigned J = 0; J != Allowed2.size(); ++J) {
- unsigned PReg2 = Allowed2[J];
+ MCRegister PReg2 = Allowed2[J];
if (PReg1 == PReg2)
CostMat[I + 1][J + 1] -= Benefit;
}
@@ -519,6 +517,20 @@ private:
}
};
+/// PBQP-specific implementation of weight normalization.
+class PBQPVirtRegAuxInfo final : public VirtRegAuxInfo {
+ float normalize(float UseDefFreq, unsigned Size, unsigned NumInstr) override {
+ // All intervals have a spill weight that is mostly proportional to the
+ // number of uses, with uses in loops having a bigger weight.
+ return NumInstr * VirtRegAuxInfo::normalize(UseDefFreq, Size, 1);
+ }
+
+public:
+ PBQPVirtRegAuxInfo(MachineFunction &MF, LiveIntervals &LIS, VirtRegMap &VRM,
+ const MachineLoopInfo &Loops,
+ const MachineBlockFrequencyInfo &MBFI)
+ : VirtRegAuxInfo(MF, LIS, VRM, Loops, MBFI) {}
+};
} // end anonymous namespace
// Out-of-line destructor/anchor for PBQPRAConstraint.
@@ -558,18 +570,19 @@ void RegAllocPBQP::findVRegIntervalsToAlloc(const MachineFunction &MF,
// Iterate over all live ranges.
for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
- unsigned Reg = Register::index2VirtReg(I);
+ Register Reg = Register::index2VirtReg(I);
if (MRI.reg_nodbg_empty(Reg))
continue;
VRegsToAlloc.insert(Reg);
}
}
-static bool isACalleeSavedRegister(unsigned reg, const TargetRegisterInfo &TRI,
+static bool isACalleeSavedRegister(MCRegister Reg,
+ const TargetRegisterInfo &TRI,
const MachineFunction &MF) {
const MCPhysReg *CSR = MF.getRegInfo().getCalleeSavedRegs();
for (unsigned i = 0; CSR[i] != 0; ++i)
- if (TRI.regsOverlap(reg, CSR[i]))
+ if (TRI.regsOverlap(Reg, CSR[i]))
return true;
return false;
}
@@ -583,12 +596,12 @@ void RegAllocPBQP::initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM,
const TargetRegisterInfo &TRI =
*G.getMetadata().MF.getSubtarget().getRegisterInfo();
- std::vector<unsigned> Worklist(VRegsToAlloc.begin(), VRegsToAlloc.end());
+ std::vector<Register> Worklist(VRegsToAlloc.begin(), VRegsToAlloc.end());
- std::map<unsigned, std::vector<unsigned>> VRegAllowedMap;
+ std::map<Register, std::vector<MCRegister>> VRegAllowedMap;
while (!Worklist.empty()) {
- unsigned VReg = Worklist.back();
+ Register VReg = Worklist.back();
Worklist.pop_back();
LiveInterval &VRegLI = LIS.getInterval(VReg);
@@ -596,8 +609,8 @@ void RegAllocPBQP::initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM,
// If this is an empty interval move it to the EmptyIntervalVRegs set then
// continue.
if (VRegLI.empty()) {
- EmptyIntervalVRegs.insert(VRegLI.reg);
- VRegsToAlloc.erase(VRegLI.reg);
+ EmptyIntervalVRegs.insert(VRegLI.reg());
+ VRegsToAlloc.erase(VRegLI.reg());
continue;
}
@@ -608,10 +621,10 @@ void RegAllocPBQP::initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM,
LIS.checkRegMaskInterference(VRegLI, RegMaskOverlaps);
// Compute an initial allowed set for the current vreg.
- std::vector<unsigned> VRegAllowed;
+ std::vector<MCRegister> VRegAllowed;
ArrayRef<MCPhysReg> RawPRegOrder = TRC->getRawAllocationOrder(MF);
for (unsigned I = 0; I != RawPRegOrder.size(); ++I) {
- unsigned PReg = RawPRegOrder[I];
+ MCRegister PReg(RawPRegOrder[I]);
if (MRI.isReserved(PReg))
continue;
@@ -639,10 +652,11 @@ void RegAllocPBQP::initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM,
if (VRegAllowed.empty()) {
SmallVector<Register, 8> NewVRegs;
spillVReg(VReg, NewVRegs, MF, LIS, VRM, VRegSpiller);
- Worklist.insert(Worklist.end(), NewVRegs.begin(), NewVRegs.end());
+ llvm::append_range(Worklist, NewVRegs);
continue;
- } else
- VRegAllowedMap[VReg] = std::move(VRegAllowed);
+ }
+
+ VRegAllowedMap[VReg.id()] = std::move(VRegAllowed);
}
for (auto &KV : VRegAllowedMap) {
@@ -685,7 +699,7 @@ void RegAllocPBQP::spillVReg(Register VReg,
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
(void)TRI;
LLVM_DEBUG(dbgs() << "VREG " << printReg(VReg, &TRI) << " -> SPILLED (Cost: "
- << LRE.getParent().weight << ", New vregs: ");
+ << LRE.getParent().weight() << ", New vregs: ");
// Copy any newly inserted live intervals into the list of regs to
// allocate.
@@ -693,8 +707,8 @@ void RegAllocPBQP::spillVReg(Register VReg,
I != E; ++I) {
const LiveInterval &LI = LIS.getInterval(*I);
assert(!LI.empty() && "Empty spill range.");
- LLVM_DEBUG(dbgs() << printReg(LI.reg, &TRI) << " ");
- VRegsToAlloc.insert(LI.reg);
+ LLVM_DEBUG(dbgs() << printReg(LI.reg(), &TRI) << " ");
+ VRegsToAlloc.insert(LI.reg());
}
LLVM_DEBUG(dbgs() << ")\n");
@@ -718,11 +732,11 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAGraph &G,
// Iterate over the nodes mapping the PBQP solution to a register
// assignment.
for (auto NId : G.nodeIds()) {
- unsigned VReg = G.getNodeMetadata(NId).getVReg();
- unsigned AllocOption = Solution.getSelection(NId);
+ Register VReg = G.getNodeMetadata(NId).getVReg();
+ unsigned AllocOpt = Solution.getSelection(NId);
- if (AllocOption != PBQP::RegAlloc::getSpillOptionIdx()) {
- unsigned PReg = G.getNodeMetadata(NId).getAllowedRegs()[AllocOption - 1];
+ if (AllocOpt != PBQP::RegAlloc::getSpillOptionIdx()) {
+ MCRegister PReg = G.getNodeMetadata(NId).getAllowedRegs()[AllocOpt - 1];
LLVM_DEBUG(dbgs() << "VREG " << printReg(VReg, &TRI) << " -> "
<< TRI.getName(PReg) << "\n");
assert(PReg != 0 && "Invalid preg selected.");
@@ -750,12 +764,12 @@ void RegAllocPBQP::finalizeAlloc(MachineFunction &MF,
I != E; ++I) {
LiveInterval &LI = LIS.getInterval(*I);
- unsigned PReg = MRI.getSimpleHint(LI.reg);
+ Register PReg = MRI.getSimpleHint(LI.reg());
if (PReg == 0) {
- const TargetRegisterClass &RC = *MRI.getRegClass(LI.reg);
+ const TargetRegisterClass &RC = *MRI.getRegClass(LI.reg());
const ArrayRef<MCPhysReg> RawPRegOrder = RC.getRawAllocationOrder(MF);
- for (unsigned CandidateReg : RawPRegOrder) {
+ for (MCRegister CandidateReg : RawPRegOrder) {
if (!VRM.getRegInfo().isReserved(CandidateReg)) {
PReg = CandidateReg;
break;
@@ -765,7 +779,7 @@ void RegAllocPBQP::finalizeAlloc(MachineFunction &MF,
"No un-reserved physical registers in this register class");
}
- VRM.assignVirt2Phys(LI.reg, PReg);
+ VRM.assignVirt2Phys(LI.reg(), PReg);
}
}
@@ -779,13 +793,6 @@ void RegAllocPBQP::postOptimization(Spiller &VRegSpiller, LiveIntervals &LIS) {
DeadRemats.clear();
}
-static inline float normalizePBQPSpillWeight(float UseDefFreq, unsigned Size,
- unsigned NumInstr) {
- // All intervals have a spill weight that is mostly proportional to the number
- // of uses, with uses in loops having a bigger weight.
- return NumInstr * normalizeSpillWeight(UseDefFreq, Size, 1);
-}
-
bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
LiveIntervals &LIS = getAnalysis<LiveIntervals>();
MachineBlockFrequencyInfo &MBFI =
@@ -793,8 +800,8 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
VirtRegMap &VRM = getAnalysis<VirtRegMap>();
- calculateSpillWeightsAndHints(LIS, MF, &VRM, getAnalysis<MachineLoopInfo>(),
- MBFI, normalizePBQPSpillWeight);
+ PBQPVirtRegAuxInfo VRAI(MF, LIS, VRM, getAnalysis<MachineLoopInfo>(), MBFI);
+ VRAI.calculateSpillWeightsAndHints();
std::unique_ptr<Spiller> VRegSpiller(createInlineSpiller(*this, MF, VRM));
@@ -878,7 +885,7 @@ static Printable PrintNodeInfo(PBQP::RegAlloc::PBQPRAGraph::NodeId NId,
return Printable([NId, &G](raw_ostream &OS) {
const MachineRegisterInfo &MRI = G.getMetadata().MF.getRegInfo();
const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
- unsigned VReg = G.getNodeMetadata(NId).getVReg();
+ Register VReg = G.getNodeMetadata(NId).getVReg();
const char *RegClassName = TRI->getRegClassName(MRI.getRegClass(VReg));
OS << NId << " (" << RegClassName << ':' << printReg(VReg, TRI) << ')';
});
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterClassInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterClassInfo.cpp
index 1523bd4d1649..0488db3d09cb 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterClassInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterClassInfo.cpp
@@ -188,7 +188,14 @@ unsigned RegisterClassInfo::computePSetLimit(unsigned Idx) const {
}
assert(RC && "Failed to find register class");
compute(RC);
- unsigned NReserved = RC->getNumRegs() - getNumAllocatableRegs(RC);
- return TRI->getRegPressureSetLimit(*MF, Idx) -
- TRI->getRegClassWeight(RC).RegWeight * NReserved;
+ unsigned NAllocatableRegs = getNumAllocatableRegs(RC);
+ unsigned RegPressureSetLimit = TRI->getRegPressureSetLimit(*MF, Idx);
+ // If all the regs are reserved, return raw RegPressureSetLimit.
+ // One example is VRSAVERC in PowerPC.
+ // Avoid returning zero, getRegPressureSetLimit(Idx) assumes computePSetLimit
+ // return non-zero value.
+ if (NAllocatableRegs == 0)
+ return RegPressureSetLimit;
+ unsigned NReserved = RC->getNumRegs() - NAllocatableRegs;
+ return RegPressureSetLimit - TRI->getRegClassWeight(RC).RegWeight * NReserved;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp
index 17160a9f42cd..7fdc85a6e444 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -137,13 +137,13 @@ namespace {
/// ordered-by-slot-index set of DBG_VALUEs, to help quick
/// identification of whether coalescing may change location validity.
using DbgValueLoc = std::pair<SlotIndex, MachineInstr*>;
- DenseMap<unsigned, std::vector<DbgValueLoc>> DbgVRegToValues;
+ DenseMap<Register, std::vector<DbgValueLoc>> DbgVRegToValues;
/// VRegs may be repeatedly coalesced, and have many DBG_VALUEs attached.
/// To avoid repeatedly merging sets of DbgValueLocs, instead record
/// which vregs have been coalesced, and where to. This map is from
/// vreg => {set of vregs merged in}.
- DenseMap<unsigned, SmallVector<unsigned, 4>> DbgMergedVRegNums;
+ DenseMap<Register, SmallVector<Register, 4>> DbgMergedVRegNums;
/// A LaneMask to remember on which subregister live ranges we need to call
/// shrinkToUses() later.
@@ -173,16 +173,16 @@ namespace {
SmallVector<MachineInstr*, 8> DeadDefs;
/// Virtual registers to be considered for register class inflation.
- SmallVector<unsigned, 8> InflateRegs;
+ SmallVector<Register, 8> InflateRegs;
/// The collection of live intervals which should have been updated
/// immediately after rematerialiation but delayed until
/// lateLiveIntervalUpdate is called.
- DenseSet<unsigned> ToBeUpdated;
+ DenseSet<Register> ToBeUpdated;
/// Record how many times the large live interval with many valnos
/// has been tried to join with other live interval.
- DenseMap<unsigned, unsigned long> LargeLIVisitCounter;
+ DenseMap<Register, unsigned long> LargeLIVisitCounter;
/// Recursively eliminate dead defs in DeadDefs.
void eliminateDeadDefs();
@@ -211,6 +211,18 @@ namespace {
/// live interval update is costly.
void lateLiveIntervalUpdate();
+ /// Check if the incoming value defined by a COPY at \p SLRQ in the subrange
+ /// has no value defined in the predecessors. If the incoming value is the
+ /// same as defined by the copy itself, the value is considered undefined.
+ bool copyValueUndefInPredecessors(LiveRange &S,
+ const MachineBasicBlock *MBB,
+ LiveQueryResult SLRQ);
+
+ /// Set necessary undef flags on subregister uses after pruning out undef
+ /// lane segments from the subrange.
+ void setUndefOnPrunedSubRegUses(LiveInterval &LI, Register Reg,
+ LaneBitmask PrunedLanes);
+
/// Attempt to join intervals corresponding to SrcReg/DstReg, which are the
/// src/dst of the copy instruction CopyMI. This returns true if the copy
/// was successfully coalesced away. If it is not currently possible to
@@ -285,7 +297,7 @@ namespace {
/// number if it is not zero. If DstReg is a physical register and the
/// existing subregister number of the def / use being updated is not zero,
/// make sure to set it to the correct physical subregister.
- void updateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx);
+ void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx);
/// If the given machine operand reads only undefined lanes add an undef
/// flag.
@@ -351,7 +363,7 @@ namespace {
JoinVals &LHSVals, LiveRange &RHS,
JoinVals &RHSVals);
- void checkMergingChangesDbgValuesImpl(unsigned Reg, LiveRange &OtherRange,
+ void checkMergingChangesDbgValuesImpl(Register Reg, LiveRange &OtherRange,
LiveRange &RegRange, JoinVals &Vals2);
public:
@@ -388,8 +400,8 @@ INITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing",
"Simple Register Coalescing", false, false)
LLVM_NODISCARD static bool isMoveInstr(const TargetRegisterInfo &tri,
- const MachineInstr *MI, unsigned &Src,
- unsigned &Dst, unsigned &SrcSub,
+ const MachineInstr *MI, Register &Src,
+ Register &Dst, unsigned &SrcSub,
unsigned &DstSub) {
if (MI->isCopy()) {
Dst = MI->getOperand(0).getReg();
@@ -424,12 +436,13 @@ static bool isSplitEdge(const MachineBasicBlock *MBB) {
}
bool CoalescerPair::setRegisters(const MachineInstr *MI) {
- SrcReg = DstReg = 0;
+ SrcReg = DstReg = Register();
SrcIdx = DstIdx = 0;
NewRC = nullptr;
Flipped = CrossClass = false;
- unsigned Src, Dst, SrcSub, DstSub;
+ Register Src, Dst;
+ unsigned SrcSub = 0, DstSub = 0;
if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub))
return false;
Partial = SrcSub || DstSub;
@@ -523,7 +536,8 @@ bool CoalescerPair::flip() {
bool CoalescerPair::isCoalescable(const MachineInstr *MI) const {
if (!MI)
return false;
- unsigned Src, Dst, SrcSub, DstSub;
+ Register Src, Dst;
+ unsigned SrcSub = 0, DstSub = 0;
if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub))
return false;
@@ -536,8 +550,8 @@ bool CoalescerPair::isCoalescable(const MachineInstr *MI) const {
}
// Now check that Dst matches DstReg.
- if (Register::isPhysicalRegister(DstReg)) {
- if (!Register::isPhysicalRegister(Dst))
+ if (DstReg.isPhysical()) {
+ if (!Dst.isPhysical())
return false;
assert(!DstIdx && !SrcIdx && "Inconsistent CoalescerPair state.");
// DstSub could be set for a physreg from INSERT_SUBREG.
@@ -547,7 +561,7 @@ bool CoalescerPair::isCoalescable(const MachineInstr *MI) const {
if (!SrcSub)
return DstReg == Dst;
// This is a partial register copy. Check that the parts match.
- return TRI.getSubReg(DstReg, SrcSub) == Dst;
+ return Register(TRI.getSubReg(DstReg, SrcSub)) == Dst;
} else {
// DstReg is virtual.
if (DstReg != Dst)
@@ -649,7 +663,7 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP,
// in IntB, we can merge them.
if (ValS+1 != BS) return false;
- LLVM_DEBUG(dbgs() << "Extending: " << printReg(IntB.reg, TRI));
+ LLVM_DEBUG(dbgs() << "Extending: " << printReg(IntB.reg(), TRI));
SlotIndex FillerStart = ValS->end, FillerEnd = BS->start;
// We are about to delete CopyMI, so need to remove it as the 'instruction
@@ -692,13 +706,13 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP,
// If the source instruction was killing the source register before the
// merge, unset the isKill marker given the live range has been extended.
- int UIdx = ValSEndInst->findRegisterUseOperandIdx(IntB.reg, true);
+ int UIdx = ValSEndInst->findRegisterUseOperandIdx(IntB.reg(), true);
if (UIdx != -1) {
ValSEndInst->getOperand(UIdx).setIsKill(false);
}
// Rewrite the copy.
- CopyMI->substituteRegister(IntA.reg, IntB.reg, 0, *TRI);
+ CopyMI->substituteRegister(IntA.reg(), IntB.reg(), 0, *TRI);
// If the copy instruction was killing the destination register or any
// subrange before the merge trim the live range.
bool RecomputeLiveRange = AS->end == CopyIdx;
@@ -817,7 +831,7 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
return { false, false };
// If DefMI is a two-address instruction then commuting it will change the
// destination register.
- int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg);
+ int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg());
assert(DefIdx != -1);
unsigned UseOpIdx;
if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx))
@@ -838,7 +852,7 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
Register NewReg = NewDstMO.getReg();
- if (NewReg != IntB.reg || !IntB.Query(AValNo->def).isKill())
+ if (NewReg != IntB.reg() || !IntB.Query(AValNo->def).isKill())
return { false, false };
// Make sure there are no other definitions of IntB that would reach the
@@ -848,7 +862,7 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
// If some of the uses of IntA.reg is already coalesced away, return false.
// It's not possible to determine whether it's safe to perform the coalescing.
- for (MachineOperand &MO : MRI->use_nodbg_operands(IntA.reg)) {
+ for (MachineOperand &MO : MRI->use_nodbg_operands(IntA.reg())) {
MachineInstr *UseMI = MO.getParent();
unsigned OpNo = &MO - &UseMI->getOperand(0);
SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI);
@@ -870,9 +884,9 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
TII->commuteInstruction(*DefMI, false, UseOpIdx, NewDstIdx);
if (!NewMI)
return { false, false };
- if (Register::isVirtualRegister(IntA.reg) &&
- Register::isVirtualRegister(IntB.reg) &&
- !MRI->constrainRegClass(IntB.reg, MRI->getRegClass(IntA.reg)))
+ if (Register::isVirtualRegister(IntA.reg()) &&
+ Register::isVirtualRegister(IntB.reg()) &&
+ !MRI->constrainRegClass(IntB.reg(), MRI->getRegClass(IntA.reg())))
return { false, false };
if (NewMI != DefMI) {
LIS->ReplaceMachineInstrInMaps(*DefMI, *NewMI);
@@ -891,9 +905,10 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
// = B
// Update uses of IntA of the specific Val# with IntB.
- for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(IntA.reg),
+ for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(IntA.reg()),
UE = MRI->use_end();
- UI != UE; /* ++UI is below because of possible MI removal */) {
+ UI != UE;
+ /* ++UI is below because of possible MI removal */) {
MachineOperand &UseMO = *UI;
++UI;
if (UseMO.isUndef())
@@ -920,7 +935,7 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
continue;
if (!UseMI->isCopy())
continue;
- if (UseMI->getOperand(0).getReg() != IntB.reg ||
+ if (UseMI->getOperand(0).getReg() != IntB.reg() ||
UseMI->getOperand(0).getSubReg())
continue;
@@ -951,10 +966,10 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
if (IntA.hasSubRanges() || IntB.hasSubRanges()) {
if (!IntA.hasSubRanges()) {
- LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntA.reg);
+ LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntA.reg());
IntA.createSubRangeFrom(Allocator, Mask, IntA);
} else if (!IntB.hasSubRanges()) {
- LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntB.reg);
+ LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntB.reg());
IntB.createSubRangeFrom(Allocator, Mask, IntB);
}
SlotIndex AIdx = CopyIdx.getRegSlot(true);
@@ -1100,8 +1115,8 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP,
continue;
}
// Check DefMI is a reverse copy and it is in BB Pred.
- if (DefMI->getOperand(0).getReg() != IntA.reg ||
- DefMI->getOperand(1).getReg() != IntB.reg ||
+ if (DefMI->getOperand(0).getReg() != IntA.reg() ||
+ DefMI->getOperand(1).getReg() != IntB.reg() ||
DefMI->getParent() != Pred) {
CopyLeftBB = Pred;
continue;
@@ -1158,8 +1173,8 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP,
// Insert new copy to CopyLeftBB.
MachineInstr *NewCopyMI = BuildMI(*CopyLeftBB, InsPos, CopyMI.getDebugLoc(),
- TII->get(TargetOpcode::COPY), IntB.reg)
- .addReg(IntA.reg);
+ TII->get(TargetOpcode::COPY), IntB.reg())
+ .addReg(IntA.reg());
SlotIndex NewCopyIdx =
LIS->InsertMachineInstrInMaps(*NewCopyMI).getRegSlot();
IntB.createDeadDef(NewCopyIdx, LIS->getVNInfoAllocator());
@@ -1212,7 +1227,10 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP,
}
++I;
}
- LIS->extendToIndices(SR, EndPoints);
+ SmallVector<SlotIndex, 8> Undefs;
+ IntB.computeSubRangeUndefs(Undefs, SR.LaneMask, *MRI,
+ *LIS->getSlotIndexes());
+ LIS->extendToIndices(SR, EndPoints, Undefs);
}
// If any dead defs were extended, truncate them.
shrinkToUses(&IntB);
@@ -1224,9 +1242,9 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP,
/// Returns true if @p MI defines the full vreg @p Reg, as opposed to just
/// defining a subregister.
-static bool definesFullReg(const MachineInstr &MI, unsigned Reg) {
- assert(!Register::isPhysicalRegister(Reg) &&
- "This code cannot handle physreg aliasing");
+static bool definesFullReg(const MachineInstr &MI, Register Reg) {
+ assert(!Reg.isPhysical() && "This code cannot handle physreg aliasing");
+
for (const MachineOperand &Op : MI.operands()) {
if (!Op.isReg() || !Op.isDef() || Op.getReg() != Reg)
continue;
@@ -1242,9 +1260,9 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
MachineInstr *CopyMI,
bool &IsDefCopy) {
IsDefCopy = false;
- unsigned SrcReg = CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg();
+ Register SrcReg = CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg();
unsigned SrcIdx = CP.isFlipped() ? CP.getDstIdx() : CP.getSrcIdx();
- unsigned DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg();
+ Register DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg();
unsigned DstIdx = CP.isFlipped() ? CP.getSrcIdx() : CP.getDstIdx();
if (Register::isPhysicalRegister(SrcReg))
return false;
@@ -1291,8 +1309,8 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
const TargetRegisterClass *DefRC = TII->getRegClass(MCID, 0, TRI, *MF);
if (!DefMI->isImplicitDef()) {
- if (Register::isPhysicalRegister(DstReg)) {
- unsigned NewDstReg = DstReg;
+ if (DstReg.isPhysical()) {
+ Register NewDstReg = DstReg;
unsigned NewDstIdx = TRI->composeSubRegIndices(CP.getSrcIdx(),
DefMI->getOperand(0).getSubReg());
@@ -1366,7 +1384,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
// NewMI may have dead implicit defs (E.g. EFLAGS for MOV<bits>r0 on X86).
// We need to remember these so we can add intervals once we insert
// NewMI into SlotIndexes.
- SmallVector<unsigned, 4> NewMIImplDefs;
+ SmallVector<MCRegister, 4> NewMIImplDefs;
for (unsigned i = NewMI.getDesc().getNumOperands(),
e = NewMI.getNumOperands();
i != e; ++i) {
@@ -1374,11 +1392,11 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
if (MO.isReg() && MO.isDef()) {
assert(MO.isImplicit() && MO.isDead() &&
Register::isPhysicalRegister(MO.getReg()));
- NewMIImplDefs.push_back(MO.getReg());
+ NewMIImplDefs.push_back(MO.getReg().asMCReg());
}
}
- if (Register::isVirtualRegister(DstReg)) {
+ if (DstReg.isVirtual()) {
unsigned NewIdx = NewMI.getOperand(0).getSubReg();
if (DefRC != nullptr) {
@@ -1513,7 +1531,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI);
for (unsigned i = 0, e = NewMIImplDefs.size(); i != e; ++i) {
- unsigned Reg = NewMIImplDefs[i];
+ MCRegister Reg = NewMIImplDefs[i];
for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
if (LiveRange *LR = LIS->getCachedRegUnit(*Units))
LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator());
@@ -1571,7 +1589,8 @@ MachineInstr *RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
// Note that we do not query CoalescerPair here but redo isMoveInstr as the
// CoalescerPair may have a new register class with adjusted subreg indices
// at this point.
- unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
+ Register SrcReg, DstReg;
+ unsigned SrcSubIdx = 0, DstSubIdx = 0;
if(!isMoveInstr(*TRI, CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx))
return nullptr;
@@ -1696,7 +1715,7 @@ void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
}
}
-void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, unsigned DstReg,
+void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
unsigned SubIdx) {
bool DstIsPhys = Register::isPhysicalRegister(DstReg);
LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg);
@@ -1752,7 +1771,7 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, unsigned DstReg,
if (SubIdx != 0 && MO.isUse() && MRI->shouldTrackSubRegLiveness(DstReg)) {
if (!DstInt->hasSubRanges()) {
BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
- LaneBitmask FullMask = MRI->getMaxLaneMaskForVReg(DstInt->reg);
+ LaneBitmask FullMask = MRI->getMaxLaneMaskForVReg(DstInt->reg());
LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask(SubIdx);
LaneBitmask UnusedLanes = FullMask & ~UsedLanes;
DstInt->createSubRangeFrom(Allocator, UsedLanes, *DstInt);
@@ -1802,6 +1821,49 @@ bool RegisterCoalescer::canJoinPhys(const CoalescerPair &CP) {
return false;
}
+bool RegisterCoalescer::copyValueUndefInPredecessors(
+ LiveRange &S, const MachineBasicBlock *MBB, LiveQueryResult SLRQ) {
+ for (const MachineBasicBlock *Pred : MBB->predecessors()) {
+ SlotIndex PredEnd = LIS->getMBBEndIdx(Pred);
+ if (VNInfo *V = S.getVNInfoAt(PredEnd.getPrevSlot())) {
+ // If this is a self loop, we may be reading the same value.
+ if (V->id != SLRQ.valueOutOrDead()->id)
+ return false;
+ }
+ }
+
+ return true;
+}
+
+void RegisterCoalescer::setUndefOnPrunedSubRegUses(LiveInterval &LI,
+ Register Reg,
+ LaneBitmask PrunedLanes) {
+ // If we had other instructions in the segment reading the undef sublane
+ // value, we need to mark them with undef.
+ for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
+ unsigned SubRegIdx = MO.getSubReg();
+ if (SubRegIdx == 0 || MO.isUndef())
+ continue;
+
+ LaneBitmask SubRegMask = TRI->getSubRegIndexLaneMask(SubRegIdx);
+ SlotIndex Pos = LIS->getInstructionIndex(*MO.getParent());
+ for (LiveInterval::SubRange &S : LI.subranges()) {
+ if (!S.liveAt(Pos) && (PrunedLanes & SubRegMask).any()) {
+ MO.setIsUndef();
+ break;
+ }
+ }
+ }
+
+ LI.removeEmptySubRanges();
+
+ // A def of a subregister may be a use of other register lanes. Replacing
+ // such a def with a def of a different register will eliminate the use,
+ // and may cause the recorded live range to be larger than the actual
+ // liveness in the program IR.
+ LIS->shrinkToUses(&LI);
+}
+
bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
Again = false;
LLVM_DEBUG(dbgs() << LIS->getInstructionIndex(*CopyMI) << '\t' << *CopyMI);
@@ -1861,16 +1923,35 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
VNInfo *ReadVNI = LRQ.valueIn();
assert(ReadVNI && "No value before copy and no <undef> flag.");
assert(ReadVNI != DefVNI && "Cannot read and define the same value.");
- LI.MergeValueNumberInto(DefVNI, ReadVNI);
+
+ // Track incoming undef lanes we need to eliminate from the subrange.
+ LaneBitmask PrunedLanes;
+ MachineBasicBlock *MBB = CopyMI->getParent();
// Process subregister liveranges.
for (LiveInterval::SubRange &S : LI.subranges()) {
LiveQueryResult SLRQ = S.Query(CopyIdx);
if (VNInfo *SDefVNI = SLRQ.valueDefined()) {
- VNInfo *SReadVNI = SLRQ.valueIn();
- S.MergeValueNumberInto(SDefVNI, SReadVNI);
+ if (VNInfo *SReadVNI = SLRQ.valueIn())
+ SDefVNI = S.MergeValueNumberInto(SDefVNI, SReadVNI);
+
+ // If this copy introduced an undef subrange from an incoming value,
+ // we need to eliminate the undef live in values from the subrange.
+ if (copyValueUndefInPredecessors(S, MBB, SLRQ)) {
+ LLVM_DEBUG(dbgs() << "Incoming sublane value is undef at copy\n");
+ PrunedLanes |= S.LaneMask;
+ S.removeValNo(SDefVNI);
+ }
}
}
+
+ LI.MergeValueNumberInto(DefVNI, ReadVNI);
+ if (PrunedLanes.any()) {
+ LLVM_DEBUG(dbgs() << "Pruning undef incoming lanes: "
+ << PrunedLanes << '\n');
+ setUndefOnPrunedSubRegUses(LI, CP.getSrcReg(), PrunedLanes);
+ }
+
LLVM_DEBUG(dbgs() << "\tMerged values: " << LI << '\n');
}
deleteInstr(CopyMI);
@@ -1885,7 +1966,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
if (!canJoinPhys(CP)) {
// Before giving up coalescing, if definition of source is defined by
// trivial computation, try rematerializing it.
- bool IsDefCopy;
+ bool IsDefCopy = false;
if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy))
return true;
if (IsDefCopy)
@@ -1924,7 +2005,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
// If definition of source is defined by trivial computation, try
// rematerializing it.
- bool IsDefCopy;
+ bool IsDefCopy = false;
if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy))
return true;
@@ -1938,7 +2019,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
if (Changed) {
deleteInstr(CopyMI);
if (Shrink) {
- unsigned DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg();
+ Register DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg();
LiveInterval &DstLI = LIS->getInterval(DstReg);
shrinkToUses(&DstLI);
LLVM_DEBUG(dbgs() << "\t\tshrunk: " << DstLI << '\n');
@@ -1991,7 +2072,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
continue;
LLVM_DEBUG(dbgs() << "Shrink LaneUses (Lane " << PrintLaneMask(S.LaneMask)
<< ")\n");
- LIS->shrinkToUses(S, LI.reg);
+ LIS->shrinkToUses(S, LI.reg());
}
LI.removeEmptySubRanges();
}
@@ -2030,8 +2111,8 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
}
bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
- unsigned DstReg = CP.getDstReg();
- unsigned SrcReg = CP.getSrcReg();
+ Register DstReg = CP.getDstReg();
+ Register SrcReg = CP.getSrcReg();
assert(CP.isPhys() && "Must be a physreg copy");
assert(MRI->isReserved(DstReg) && "Not a reserved register");
LiveInterval &RHS = LIS->getInterval(SrcReg);
@@ -2128,7 +2209,7 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
LLVM_DEBUG(dbgs() << "\t\tRemoving phys reg def of "
<< printReg(DstReg, TRI) << " at " << CopyRegIdx << "\n");
- LIS->removePhysRegDefAt(DstReg, CopyRegIdx);
+ LIS->removePhysRegDefAt(DstReg.asMCReg(), CopyRegIdx);
// Create a new dead def at the new def location.
for (MCRegUnitIterator UI(DstReg, TRI); UI.isValid(); ++UI) {
LiveRange &LR = LIS->getRegUnit(*UI);
@@ -2219,7 +2300,7 @@ class JoinVals {
LiveRange &LR;
/// (Main) register we work on.
- const unsigned Reg;
+ const Register Reg;
/// Reg (and therefore the values in this liverange) will end up as
/// subregister SubIdx in the coalesced register. Either CP.DstIdx or
@@ -2339,7 +2420,7 @@ class JoinVals {
LaneBitmask computeWriteLanes(const MachineInstr *DefMI, bool &Redef) const;
/// Find the ultimate value that VNI was copied from.
- std::pair<const VNInfo*,unsigned> followCopyChain(const VNInfo *VNI) const;
+ std::pair<const VNInfo *, Register> followCopyChain(const VNInfo *VNI) const;
bool valuesIdentical(VNInfo *Value0, VNInfo *Value1, const JoinVals &Other) const;
@@ -2378,7 +2459,7 @@ class JoinVals {
/// Return true if MI uses any of the given Lanes from Reg.
/// This does not include partial redefinitions of Reg.
- bool usesLanes(const MachineInstr &MI, unsigned, unsigned, LaneBitmask) const;
+ bool usesLanes(const MachineInstr &MI, Register, unsigned, LaneBitmask) const;
/// Determine if ValNo is a copy of a value number in LR or Other.LR that will
/// be pruned:
@@ -2389,14 +2470,15 @@ class JoinVals {
bool isPrunedValue(unsigned ValNo, JoinVals &Other);
public:
- JoinVals(LiveRange &LR, unsigned Reg, unsigned SubIdx, LaneBitmask LaneMask,
- SmallVectorImpl<VNInfo*> &newVNInfo, const CoalescerPair &cp,
+ JoinVals(LiveRange &LR, Register Reg, unsigned SubIdx, LaneBitmask LaneMask,
+ SmallVectorImpl<VNInfo *> &newVNInfo, const CoalescerPair &cp,
LiveIntervals *lis, const TargetRegisterInfo *TRI, bool SubRangeJoin,
bool TrackSubRegLiveness)
- : LR(LR), Reg(Reg), SubIdx(SubIdx), LaneMask(LaneMask),
- SubRangeJoin(SubRangeJoin), TrackSubRegLiveness(TrackSubRegLiveness),
- NewVNInfo(newVNInfo), CP(cp), LIS(lis), Indexes(LIS->getSlotIndexes()),
- TRI(TRI), Assignments(LR.getNumValNums(), -1), Vals(LR.getNumValNums()) {}
+ : LR(LR), Reg(Reg), SubIdx(SubIdx), LaneMask(LaneMask),
+ SubRangeJoin(SubRangeJoin), TrackSubRegLiveness(TrackSubRegLiveness),
+ NewVNInfo(newVNInfo), CP(cp), LIS(lis), Indexes(LIS->getSlotIndexes()),
+ TRI(TRI), Assignments(LR.getNumValNums(), -1),
+ Vals(LR.getNumValNums()) {}
/// Analyze defs in LR and compute a value mapping in NewVNInfo.
/// Returns false if any conflicts were impossible to resolve.
@@ -2462,9 +2544,9 @@ LaneBitmask JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef)
return L;
}
-std::pair<const VNInfo*, unsigned> JoinVals::followCopyChain(
- const VNInfo *VNI) const {
- unsigned TrackReg = Reg;
+std::pair<const VNInfo *, Register>
+JoinVals::followCopyChain(const VNInfo *VNI) const {
+ Register TrackReg = Reg;
while (!VNI->isPHIDef()) {
SlotIndex Def = VNI->def;
@@ -2473,7 +2555,7 @@ std::pair<const VNInfo*, unsigned> JoinVals::followCopyChain(
if (!MI->isFullCopy())
return std::make_pair(VNI, TrackReg);
Register SrcReg = MI->getOperand(1).getReg();
- if (!Register::isVirtualRegister(SrcReg))
+ if (!SrcReg.isVirtual())
return std::make_pair(VNI, TrackReg);
const LiveInterval &LI = LIS->getInterval(SrcReg);
@@ -2518,13 +2600,13 @@ std::pair<const VNInfo*, unsigned> JoinVals::followCopyChain(
bool JoinVals::valuesIdentical(VNInfo *Value0, VNInfo *Value1,
const JoinVals &Other) const {
const VNInfo *Orig0;
- unsigned Reg0;
+ Register Reg0;
std::tie(Orig0, Reg0) = followCopyChain(Value0);
if (Orig0 == Value1 && Reg0 == Other.Reg)
return true;
const VNInfo *Orig1;
- unsigned Reg1;
+ Register Reg1;
std::tie(Orig1, Reg1) = Other.followCopyChain(Value1);
// If both values are undefined, and the source registers are the same
// register, the values are identical. Filter out cases where only one
@@ -2685,14 +2767,8 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
return CR_Replace;
// Check for simple erasable conflicts.
- if (DefMI->isImplicitDef()) {
- // We need the def for the subregister if there is nothing else live at the
- // subrange at this point.
- if (TrackSubRegLiveness
- && (V.WriteLanes & (OtherV.ValidLanes | OtherV.WriteLanes)).none())
- return CR_Replace;
+ if (DefMI->isImplicitDef())
return CR_Erase;
- }
// Include the non-conflict where DefMI is a coalescable copy that kills
// OtherVNI. We still want the copy erased and value numbers merged.
@@ -2881,7 +2957,7 @@ taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other,
return true;
}
-bool JoinVals::usesLanes(const MachineInstr &MI, unsigned Reg, unsigned SubIdx,
+bool JoinVals::usesLanes(const MachineInstr &MI, Register Reg, unsigned SubIdx,
LaneBitmask Lanes) const {
if (MI.isDebugInstr())
return false;
@@ -3353,7 +3429,7 @@ void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
bool RegisterCoalescer::isHighCostLiveInterval(LiveInterval &LI) {
if (LI.valnos.size() < LargeIntervalSizeThreshold)
return false;
- auto &Counter = LargeLIVisitCounter[LI.reg];
+ auto &Counter = LargeLIVisitCounter[LI.reg()];
if (Counter < LargeIntervalFreqThreshold) {
Counter++;
return false;
@@ -3456,8 +3532,8 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
// Kill flags are going to be wrong if the live ranges were overlapping.
// Eventually, we should simply clear all kill flags when computing live
// ranges. They are reinserted after register allocation.
- MRI->clearKillFlags(LHS.reg);
- MRI->clearKillFlags(RHS.reg);
+ MRI->clearKillFlags(LHS.reg());
+ MRI->clearKillFlags(RHS.reg());
if (!EndPoints.empty()) {
// Recompute the parts of the live range we had to remove because of
@@ -3525,20 +3601,20 @@ void RegisterCoalescer::checkMergingChangesDbgValues(CoalescerPair &CP,
JoinVals &LHSVals,
LiveRange &RHS,
JoinVals &RHSVals) {
- auto ScanForDstReg = [&](unsigned Reg) {
+ auto ScanForDstReg = [&](Register Reg) {
checkMergingChangesDbgValuesImpl(Reg, RHS, LHS, LHSVals);
};
- auto ScanForSrcReg = [&](unsigned Reg) {
+ auto ScanForSrcReg = [&](Register Reg) {
checkMergingChangesDbgValuesImpl(Reg, LHS, RHS, RHSVals);
};
// Scan for potentially unsound DBG_VALUEs: examine first the register number
// Reg, and then any other vregs that may have been merged into it.
- auto PerformScan = [this](unsigned Reg, std::function<void(unsigned)> Func) {
+ auto PerformScan = [this](Register Reg, std::function<void(Register)> Func) {
Func(Reg);
if (DbgMergedVRegNums.count(Reg))
- for (unsigned X : DbgMergedVRegNums[Reg])
+ for (Register X : DbgMergedVRegNums[Reg])
Func(X);
};
@@ -3547,7 +3623,7 @@ void RegisterCoalescer::checkMergingChangesDbgValues(CoalescerPair &CP,
PerformScan(CP.getDstReg(), ScanForDstReg);
}
-void RegisterCoalescer::checkMergingChangesDbgValuesImpl(unsigned Reg,
+void RegisterCoalescer::checkMergingChangesDbgValuesImpl(Register Reg,
LiveRange &OtherLR,
LiveRange &RegLR,
JoinVals &RegVals) {
@@ -3673,7 +3749,7 @@ static bool isLocalCopy(MachineInstr *Copy, const LiveIntervals *LIS) {
}
void RegisterCoalescer::lateLiveIntervalUpdate() {
- for (unsigned reg : ToBeUpdated) {
+ for (Register reg : ToBeUpdated) {
if (!LIS->hasInterval(reg))
continue;
LiveInterval &LI = LIS->getInterval(reg);
@@ -3707,7 +3783,7 @@ copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList) {
/// Check if DstReg is a terminal node.
/// I.e., it does not have any affinity other than \p Copy.
-static bool isTerminalReg(unsigned DstReg, const MachineInstr &Copy,
+static bool isTerminalReg(Register DstReg, const MachineInstr &Copy,
const MachineRegisterInfo *MRI) {
assert(Copy.isCopyLike());
// Check if the destination of this copy as any other affinity.
@@ -3721,15 +3797,16 @@ bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const {
assert(Copy.isCopyLike());
if (!UseTerminalRule)
return false;
- unsigned DstReg, DstSubReg, SrcReg, SrcSubReg;
+ Register SrcReg, DstReg;
+ unsigned SrcSubReg = 0, DstSubReg = 0;
if (!isMoveInstr(*TRI, &Copy, SrcReg, DstReg, SrcSubReg, DstSubReg))
return false;
// Check if the destination of this copy has any other affinity.
- if (Register::isPhysicalRegister(DstReg) ||
+ if (DstReg.isPhysical() ||
// If SrcReg is a physical register, the copy won't be coalesced.
// Ignoring it may have other side effect (like missing
// rematerialization). So keep it.
- Register::isPhysicalRegister(SrcReg) || !isTerminalReg(DstReg, Copy, MRI))
+ SrcReg.isPhysical() || !isTerminalReg(DstReg, Copy, MRI))
return false;
// DstReg is a terminal node. Check if it interferes with any other
@@ -3745,7 +3822,8 @@ bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const {
// For now, just consider the copies that are in the same block.
if (&MI == &Copy || !MI.isCopyLike() || MI.getParent() != OrigBB)
continue;
- unsigned OtherReg, OtherSubReg, OtherSrcReg, OtherSrcSubReg;
+ Register OtherSrcReg, OtherReg;
+ unsigned OtherSrcSubReg = 0, OtherSubReg = 0;
if (!isMoveInstr(*TRI, &Copy, OtherSrcReg, OtherReg, OtherSrcSubReg,
OtherSubReg))
return false;
@@ -3930,7 +4008,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
LLVM_DEBUG(dbgs() << "Trying to inflate " << InflateRegs.size()
<< " regs.\n");
for (unsigned i = 0, e = InflateRegs.size(); i != e; ++i) {
- unsigned Reg = InflateRegs[i];
+ Register Reg = InflateRegs[i];
if (MRI->reg_nodbg_empty(Reg))
continue;
if (MRI->recomputeRegClass(Reg)) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.h b/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.h
index f505d46cd338..f265d93fb0d6 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.h
@@ -14,6 +14,8 @@
#ifndef LLVM_LIB_CODEGEN_REGISTERCOALESCER_H
#define LLVM_LIB_CODEGEN_REGISTERCOALESCER_H
+#include "llvm/CodeGen/Register.h"
+
namespace llvm {
class MachineInstr;
@@ -28,10 +30,10 @@ class TargetRegisterInfo;
/// The register that will be left after coalescing. It can be a
/// virtual or physical register.
- unsigned DstReg = 0;
+ Register DstReg;
/// The virtual register that will be coalesced into dstReg.
- unsigned SrcReg = 0;
+ Register SrcReg;
/// The sub-register index of the old DstReg in the new coalesced register.
unsigned DstIdx = 0;
@@ -59,9 +61,9 @@ class TargetRegisterInfo;
/// Create a CoalescerPair representing a virtreg-to-physreg copy.
/// No need to call setRegisters().
- CoalescerPair(unsigned VirtReg, unsigned PhysReg,
+ CoalescerPair(Register VirtReg, MCRegister PhysReg,
const TargetRegisterInfo &tri)
- : TRI(tri), DstReg(PhysReg), SrcReg(VirtReg) {}
+ : TRI(tri), DstReg(PhysReg), SrcReg(VirtReg) {}
/// Set registers to match the copy instruction MI. Return
/// false if MI is not a coalescable copy instruction.
@@ -92,10 +94,10 @@ class TargetRegisterInfo;
/// Return the register (virtual or physical) that will remain
/// after coalescing.
- unsigned getDstReg() const { return DstReg; }
+ Register getDstReg() const { return DstReg; }
/// Return the virtual register that will be coalesced away.
- unsigned getSrcReg() const { return SrcReg; }
+ Register getSrcReg() const { return SrcReg; }
/// Return the subregister index that DstReg will be coalesced into, or 0.
unsigned getDstIdx() const { return DstIdx; }
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterPressure.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterPressure.cpp
index ecbc4ed63ef6..8f1fc103e869 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterPressure.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterPressure.cpp
@@ -62,7 +62,7 @@ static void increaseSetPressure(std::vector<unsigned> &CurrSetPressure,
/// Decrease pressure for each pressure set provided by TargetRegisterInfo.
static void decreaseSetPressure(std::vector<unsigned> &CurrSetPressure,
- const MachineRegisterInfo &MRI, unsigned Reg,
+ const MachineRegisterInfo &MRI, Register Reg,
LaneBitmask PrevMask, LaneBitmask NewMask) {
//assert((NewMask & !PrevMask) == 0 && "Must not add bits");
if (NewMask.any() || PrevMask.none())
@@ -152,7 +152,7 @@ void RegPressureDelta::dump() const {
#endif
-void RegPressureTracker::increaseRegPressure(unsigned RegUnit,
+void RegPressureTracker::increaseRegPressure(Register RegUnit,
LaneBitmask PreviousMask,
LaneBitmask NewMask) {
if (PreviousMask.any() || NewMask.none())
@@ -167,7 +167,7 @@ void RegPressureTracker::increaseRegPressure(unsigned RegUnit,
}
}
-void RegPressureTracker::decreaseRegPressure(unsigned RegUnit,
+void RegPressureTracker::decreaseRegPressure(Register RegUnit,
LaneBitmask PreviousMask,
LaneBitmask NewMask) {
decreaseSetPressure(CurrSetPressure, *MRI, RegUnit, PreviousMask, NewMask);
@@ -360,7 +360,7 @@ void RegPressureTracker::initLiveThru(const RegPressureTracker &RPTracker) {
LiveThruPressure.assign(TRI->getNumRegPressureSets(), 0);
assert(isBottomClosed() && "need bottom-up tracking to intialize.");
for (const RegisterMaskPair &Pair : P.LiveOutRegs) {
- unsigned RegUnit = Pair.RegUnit;
+ Register RegUnit = Pair.RegUnit;
if (Register::isVirtualRegister(RegUnit)
&& !RPTracker.hasUntiedDef(RegUnit))
increaseSetPressure(LiveThruPressure, *MRI, RegUnit,
@@ -369,7 +369,7 @@ void RegPressureTracker::initLiveThru(const RegPressureTracker &RPTracker) {
}
static LaneBitmask getRegLanes(ArrayRef<RegisterMaskPair> RegUnits,
- unsigned RegUnit) {
+ Register RegUnit) {
auto I = llvm::find_if(RegUnits, [RegUnit](const RegisterMaskPair Other) {
return Other.RegUnit == RegUnit;
});
@@ -380,7 +380,7 @@ static LaneBitmask getRegLanes(ArrayRef<RegisterMaskPair> RegUnits,
static void addRegLanes(SmallVectorImpl<RegisterMaskPair> &RegUnits,
RegisterMaskPair Pair) {
- unsigned RegUnit = Pair.RegUnit;
+ Register RegUnit = Pair.RegUnit;
assert(Pair.LaneMask.any());
auto I = llvm::find_if(RegUnits, [RegUnit](const RegisterMaskPair Other) {
return Other.RegUnit == RegUnit;
@@ -393,7 +393,7 @@ static void addRegLanes(SmallVectorImpl<RegisterMaskPair> &RegUnits,
}
static void setRegZero(SmallVectorImpl<RegisterMaskPair> &RegUnits,
- unsigned RegUnit) {
+ Register RegUnit) {
auto I = llvm::find_if(RegUnits, [RegUnit](const RegisterMaskPair Other) {
return Other.RegUnit == RegUnit;
});
@@ -406,7 +406,7 @@ static void setRegZero(SmallVectorImpl<RegisterMaskPair> &RegUnits,
static void removeRegLanes(SmallVectorImpl<RegisterMaskPair> &RegUnits,
RegisterMaskPair Pair) {
- unsigned RegUnit = Pair.RegUnit;
+ Register RegUnit = Pair.RegUnit;
assert(Pair.LaneMask.any());
auto I = llvm::find_if(RegUnits, [RegUnit](const RegisterMaskPair Other) {
return Other.RegUnit == RegUnit;
@@ -418,11 +418,12 @@ static void removeRegLanes(SmallVectorImpl<RegisterMaskPair> &RegUnits,
}
}
-static LaneBitmask getLanesWithProperty(const LiveIntervals &LIS,
- const MachineRegisterInfo &MRI, bool TrackLaneMasks, unsigned RegUnit,
- SlotIndex Pos, LaneBitmask SafeDefault,
- bool(*Property)(const LiveRange &LR, SlotIndex Pos)) {
- if (Register::isVirtualRegister(RegUnit)) {
+static LaneBitmask
+getLanesWithProperty(const LiveIntervals &LIS, const MachineRegisterInfo &MRI,
+ bool TrackLaneMasks, Register RegUnit, SlotIndex Pos,
+ LaneBitmask SafeDefault,
+ bool (*Property)(const LiveRange &LR, SlotIndex Pos)) {
+ if (RegUnit.isVirtual()) {
const LiveInterval &LI = LIS.getInterval(RegUnit);
LaneBitmask Result;
if (TrackLaneMasks && LI.hasSubRanges()) {
@@ -448,7 +449,7 @@ static LaneBitmask getLanesWithProperty(const LiveIntervals &LIS,
static LaneBitmask getLiveLanesAt(const LiveIntervals &LIS,
const MachineRegisterInfo &MRI,
- bool TrackLaneMasks, unsigned RegUnit,
+ bool TrackLaneMasks, Register RegUnit,
SlotIndex Pos) {
return getLanesWithProperty(LIS, MRI, TrackLaneMasks, RegUnit, Pos,
LaneBitmask::getAll(),
@@ -457,7 +458,6 @@ static LaneBitmask getLiveLanesAt(const LiveIntervals &LIS,
});
}
-
namespace {
/// Collect this instruction's unique uses and defs into SmallVectors for
@@ -517,12 +517,13 @@ class RegisterOperandsCollector {
}
}
- void pushReg(unsigned Reg,
+ void pushReg(Register Reg,
SmallVectorImpl<RegisterMaskPair> &RegUnits) const {
- if (Register::isVirtualRegister(Reg)) {
+ if (Reg.isVirtual()) {
addRegLanes(RegUnits, RegisterMaskPair(Reg, LaneBitmask::getAll()));
} else if (MRI.isAllocatable(Reg)) {
- for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units)
+ for (MCRegUnitIterator Units(Reg.asMCReg(), &TRI); Units.isValid();
+ ++Units)
addRegLanes(RegUnits, RegisterMaskPair(*Units, LaneBitmask::getAll()));
}
}
@@ -549,15 +550,16 @@ class RegisterOperandsCollector {
}
}
- void pushRegLanes(unsigned Reg, unsigned SubRegIdx,
+ void pushRegLanes(Register Reg, unsigned SubRegIdx,
SmallVectorImpl<RegisterMaskPair> &RegUnits) const {
- if (Register::isVirtualRegister(Reg)) {
+ if (Reg.isVirtual()) {
LaneBitmask LaneMask = SubRegIdx != 0
? TRI.getSubRegIndexLaneMask(SubRegIdx)
: MRI.getMaxLaneMaskForVReg(Reg);
addRegLanes(RegUnits, RegisterMaskPair(Reg, LaneMask));
} else if (MRI.isAllocatable(Reg)) {
- for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units)
+ for (MCRegUnitIterator Units(Reg.asMCReg(), &TRI); Units.isValid();
+ ++Units)
addRegLanes(RegUnits, RegisterMaskPair(*Units, LaneBitmask::getAll()));
}
}
@@ -580,7 +582,7 @@ void RegisterOperands::detectDeadDefs(const MachineInstr &MI,
const LiveIntervals &LIS) {
SlotIndex SlotIdx = LIS.getInstructionIndex(MI);
for (auto RI = Defs.begin(); RI != Defs.end(); /*empty*/) {
- unsigned Reg = RI->RegUnit;
+ Register Reg = RI->RegUnit;
const LiveRange *LR = getLiveRange(LIS, Reg);
if (LR != nullptr) {
LiveQueryResult LRQ = LR->Query(SlotIdx);
@@ -605,7 +607,7 @@ void RegisterOperands::adjustLaneLiveness(const LiveIntervals &LIS,
Pos.getDeadSlot());
// If the def is all that is live after the instruction, then in case
// of a subregister def we need a read-undef flag.
- unsigned RegUnit = I->RegUnit;
+ Register RegUnit = I->RegUnit;
if (Register::isVirtualRegister(RegUnit) &&
AddFlagsMI != nullptr && (LiveAfter & ~I->LaneMask).none())
AddFlagsMI->setRegisterDefReadUndef(RegUnit);
@@ -631,7 +633,7 @@ void RegisterOperands::adjustLaneLiveness(const LiveIntervals &LIS,
}
if (AddFlagsMI != nullptr) {
for (const RegisterMaskPair &P : DeadDefs) {
- unsigned RegUnit = P.RegUnit;
+ Register RegUnit = P.RegUnit;
if (!Register::isVirtualRegister(RegUnit))
continue;
LaneBitmask LiveAfter = getLiveLanesAt(LIS, MRI, true, RegUnit,
@@ -667,7 +669,7 @@ void PressureDiffs::addInstruction(unsigned Idx,
}
/// Add a change in pressure to the pressure diff of a given instruction.
-void PressureDiff::addPressureChange(unsigned RegUnit, bool IsDec,
+void PressureDiff::addPressureChange(Register RegUnit, bool IsDec,
const MachineRegisterInfo *MRI) {
PSetIterator PSetI = MRI->getPressureSets(RegUnit);
int Weight = IsDec ? -PSetI.getWeight() : PSetI.getWeight();
@@ -714,7 +716,7 @@ void RegPressureTracker::discoverLiveInOrOut(RegisterMaskPair Pair,
SmallVectorImpl<RegisterMaskPair> &LiveInOrOut) {
assert(Pair.LaneMask.any());
- unsigned RegUnit = Pair.RegUnit;
+ Register RegUnit = Pair.RegUnit;
auto I = llvm::find_if(LiveInOrOut, [RegUnit](const RegisterMaskPair &Other) {
return Other.RegUnit == RegUnit;
});
@@ -742,13 +744,13 @@ void RegPressureTracker::discoverLiveOut(RegisterMaskPair Pair) {
void RegPressureTracker::bumpDeadDefs(ArrayRef<RegisterMaskPair> DeadDefs) {
for (const RegisterMaskPair &P : DeadDefs) {
- unsigned Reg = P.RegUnit;
+ Register Reg = P.RegUnit;
LaneBitmask LiveMask = LiveRegs.contains(Reg);
LaneBitmask BumpedMask = LiveMask | P.LaneMask;
increaseRegPressure(Reg, LiveMask, BumpedMask);
}
for (const RegisterMaskPair &P : DeadDefs) {
- unsigned Reg = P.RegUnit;
+ Register Reg = P.RegUnit;
LaneBitmask LiveMask = LiveRegs.contains(Reg);
LaneBitmask BumpedMask = LiveMask | P.LaneMask;
decreaseRegPressure(Reg, BumpedMask, LiveMask);
@@ -770,7 +772,7 @@ void RegPressureTracker::recede(const RegisterOperands &RegOpers,
// Kill liveness at live defs.
// TODO: consider earlyclobbers?
for (const RegisterMaskPair &Def : RegOpers.Defs) {
- unsigned Reg = Def.RegUnit;
+ Register Reg = Def.RegUnit;
LaneBitmask PreviousMask = LiveRegs.erase(Def);
LaneBitmask NewMask = PreviousMask & ~Def.LaneMask;
@@ -800,7 +802,7 @@ void RegPressureTracker::recede(const RegisterOperands &RegOpers,
// Generate liveness for uses.
for (const RegisterMaskPair &Use : RegOpers.Uses) {
- unsigned Reg = Use.RegUnit;
+ Register Reg = Use.RegUnit;
assert(Use.LaneMask.any());
LaneBitmask PreviousMask = LiveRegs.insert(Use);
LaneBitmask NewMask = PreviousMask | Use.LaneMask;
@@ -840,7 +842,7 @@ void RegPressureTracker::recede(const RegisterOperands &RegOpers,
}
if (TrackUntiedDefs) {
for (const RegisterMaskPair &Def : RegOpers.Defs) {
- unsigned RegUnit = Def.RegUnit;
+ Register RegUnit = Def.RegUnit;
if (Register::isVirtualRegister(RegUnit) &&
(LiveRegs.contains(RegUnit) & Def.LaneMask).none())
UntiedDefs.insert(RegUnit);
@@ -911,7 +913,7 @@ void RegPressureTracker::advance(const RegisterOperands &RegOpers) {
}
for (const RegisterMaskPair &Use : RegOpers.Uses) {
- unsigned Reg = Use.RegUnit;
+ Register Reg = Use.RegUnit;
LaneBitmask LiveMask = LiveRegs.contains(Reg);
LaneBitmask LiveIn = Use.LaneMask & ~LiveMask;
if (LiveIn.any()) {
@@ -1060,7 +1062,7 @@ void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) {
// Kill liveness at live defs.
for (const RegisterMaskPair &P : RegOpers.Defs) {
- unsigned Reg = P.RegUnit;
+ Register Reg = P.RegUnit;
LaneBitmask LiveLanes = LiveRegs.contains(Reg);
LaneBitmask UseLanes = getRegLanes(RegOpers.Uses, Reg);
LaneBitmask DefLanes = P.LaneMask;
@@ -1069,7 +1071,7 @@ void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) {
}
// Generate liveness for uses.
for (const RegisterMaskPair &P : RegOpers.Uses) {
- unsigned Reg = P.RegUnit;
+ Register Reg = P.RegUnit;
LaneBitmask LiveLanes = LiveRegs.contains(Reg);
LaneBitmask LiveAfter = LiveLanes | P.LaneMask;
increaseRegPressure(Reg, LiveLanes, LiveAfter);
@@ -1240,7 +1242,7 @@ static LaneBitmask findUseBetween(unsigned Reg, LaneBitmask LastUseMask,
return LastUseMask;
}
-LaneBitmask RegPressureTracker::getLiveLanesAt(unsigned RegUnit,
+LaneBitmask RegPressureTracker::getLiveLanesAt(Register RegUnit,
SlotIndex Pos) const {
assert(RequireIntervals);
return getLanesWithProperty(*LIS, *MRI, TrackLaneMasks, RegUnit, Pos,
@@ -1250,7 +1252,7 @@ LaneBitmask RegPressureTracker::getLiveLanesAt(unsigned RegUnit,
});
}
-LaneBitmask RegPressureTracker::getLastUsedLanes(unsigned RegUnit,
+LaneBitmask RegPressureTracker::getLastUsedLanes(Register RegUnit,
SlotIndex Pos) const {
assert(RequireIntervals);
return getLanesWithProperty(*LIS, *MRI, TrackLaneMasks, RegUnit,
@@ -1261,7 +1263,7 @@ LaneBitmask RegPressureTracker::getLastUsedLanes(unsigned RegUnit,
});
}
-LaneBitmask RegPressureTracker::getLiveThroughAt(unsigned RegUnit,
+LaneBitmask RegPressureTracker::getLiveThroughAt(Register RegUnit,
SlotIndex Pos) const {
assert(RequireIntervals);
return getLanesWithProperty(*LIS, *MRI, TrackLaneMasks, RegUnit, Pos,
@@ -1294,7 +1296,7 @@ void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) {
if (RequireIntervals) {
for (const RegisterMaskPair &Use : RegOpers.Uses) {
- unsigned Reg = Use.RegUnit;
+ Register Reg = Use.RegUnit;
LaneBitmask LastUseMask = getLastUsedLanes(Reg, SlotIdx);
if (LastUseMask.none())
continue;
@@ -1317,7 +1319,7 @@ void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) {
// Generate liveness for defs.
for (const RegisterMaskPair &Def : RegOpers.Defs) {
- unsigned Reg = Def.RegUnit;
+ Register Reg = Def.RegUnit;
LaneBitmask LiveMask = LiveRegs.contains(Reg);
LaneBitmask NewMask = LiveMask | Def.LaneMask;
increaseRegPressure(Reg, LiveMask, NewMask);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp
index 41b6de1441d7..a833895c115d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp
@@ -91,18 +91,18 @@ void RegScavenger::enterBasicBlockEnd(MachineBasicBlock &MBB) {
LiveUnits.addLiveOuts(MBB);
// Move internal iterator at the last instruction of the block.
- if (MBB.begin() != MBB.end()) {
+ if (!MBB.empty()) {
MBBI = std::prev(MBB.end());
Tracking = true;
}
}
-void RegScavenger::addRegUnits(BitVector &BV, Register Reg) {
+void RegScavenger::addRegUnits(BitVector &BV, MCRegister Reg) {
for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI)
BV.set(*RUI);
}
-void RegScavenger::removeRegUnits(BitVector &BV, Register Reg) {
+void RegScavenger::removeRegUnits(BitVector &BV, MCRegister Reg) {
for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI)
BV.reset(*RUI);
}
@@ -134,9 +134,9 @@ void RegScavenger::determineKillsAndDefs() {
}
if (!MO.isReg())
continue;
- Register Reg = MO.getReg();
- if (!Register::isPhysicalRegister(Reg) || isReserved(Reg))
+ if (!MO.getReg().isPhysical() || isReserved(MO.getReg()))
continue;
+ MCRegister Reg = MO.getReg().asMCReg();
if (MO.isUse()) {
// Ignore undef uses.
@@ -154,25 +154,6 @@ void RegScavenger::determineKillsAndDefs() {
}
}
-void RegScavenger::unprocess() {
- assert(Tracking && "Cannot unprocess because we're not tracking");
-
- MachineInstr &MI = *MBBI;
- if (!MI.isDebugInstr()) {
- determineKillsAndDefs();
-
- // Commit the changes.
- setUnused(DefRegUnits);
- setUsed(KillRegUnits);
- }
-
- if (MBBI == MBB->begin()) {
- MBBI = MachineBasicBlock::iterator(nullptr);
- Tracking = false;
- } else
- --MBBI;
-}
-
void RegScavenger::forward() {
// Move ptr forward.
if (!Tracking) {
@@ -592,9 +573,8 @@ Register RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC,
RestoreAfter);
MCPhysReg Reg = P.first;
MachineBasicBlock::iterator SpillBefore = P.second;
- assert(Reg != 0 && "No register left to scavenge!");
// Found an available register?
- if (SpillBefore == MBB.end()) {
+ if (Reg != 0 && SpillBefore == MBB.end()) {
LLVM_DEBUG(dbgs() << "Scavenged free register: " << printReg(Reg, TRI)
<< '\n');
return Reg;
@@ -603,6 +583,8 @@ Register RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC,
if (!AllowSpill)
return 0;
+ assert(Reg != 0 && "No register left to scavenge!");
+
MachineBasicBlock::iterator ReloadAfter =
RestoreAfter ? std::next(MBBI) : MBBI;
MachineBasicBlock::iterator ReloadBefore = std::next(ReloadAfter);
@@ -652,11 +634,10 @@ static Register scavengeVReg(MachineRegisterInfo &MRI, RegScavenger &RS,
// we get a single contiguous lifetime.
//
// Definitions in MRI.def_begin() are unordered, search for the first.
- MachineRegisterInfo::def_iterator FirstDef =
- std::find_if(MRI.def_begin(VReg), MRI.def_end(),
- [VReg, &TRI](const MachineOperand &MO) {
- return !MO.getParent()->readsRegister(VReg, &TRI);
- });
+ MachineRegisterInfo::def_iterator FirstDef = llvm::find_if(
+ MRI.def_operands(VReg), [VReg, &TRI](const MachineOperand &MO) {
+ return !MO.getParent()->readsRegister(VReg, &TRI);
+ });
assert(FirstDef != MRI.def_end() &&
"Must have one definition that does not redefine vreg");
MachineInstr &DefMI = *FirstDef->getParent();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RenameIndependentSubregs.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
index 4ee28d6bbb46..0872ec303460 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
@@ -130,7 +130,7 @@ bool RenameIndependentSubregs::renameComponents(LiveInterval &LI) const {
return false;
// Create a new VReg for each class.
- unsigned Reg = LI.reg;
+ unsigned Reg = LI.reg();
const TargetRegisterClass *RegClass = MRI->getRegClass(Reg);
SmallVector<LiveInterval*, 4> Intervals;
Intervals.push_back(&LI);
@@ -175,7 +175,7 @@ bool RenameIndependentSubregs::findComponents(IntEqClasses &Classes,
// across subranges when they are affected by the same MachineOperand.
const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo();
Classes.grow(NumComponents);
- unsigned Reg = LI.reg;
+ unsigned Reg = LI.reg();
for (const MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) {
if (!MO.isDef() && !MO.readsReg())
continue;
@@ -212,7 +212,7 @@ void RenameIndependentSubregs::rewriteOperands(const IntEqClasses &Classes,
const SmallVectorImpl<SubRangeInfo> &SubRangeInfos,
const SmallVectorImpl<LiveInterval*> &Intervals) const {
const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo();
- unsigned Reg = Intervals[0]->reg;
+ unsigned Reg = Intervals[0]->reg();
for (MachineRegisterInfo::reg_nodbg_iterator I = MRI->reg_nodbg_begin(Reg),
E = MRI->reg_nodbg_end(); I != E; ) {
MachineOperand &MO = *I++;
@@ -242,7 +242,7 @@ void RenameIndependentSubregs::rewriteOperands(const IntEqClasses &Classes,
break;
}
- unsigned VReg = Intervals[ID]->reg;
+ unsigned VReg = Intervals[ID]->reg();
MO.setReg(VReg);
if (MO.isTied() && Reg != VReg) {
@@ -304,7 +304,7 @@ void RenameIndependentSubregs::computeMainRangesFixFlags(
const SlotIndexes &Indexes = *LIS->getSlotIndexes();
for (size_t I = 0, E = Intervals.size(); I < E; ++I) {
LiveInterval &LI = *Intervals[I];
- unsigned Reg = LI.reg;
+ unsigned Reg = LI.reg();
LI.removeEmptySubRanges();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp
index 55478c232dd7..31797631c97b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp
@@ -151,7 +151,7 @@ class SafeStack {
Value *getStackGuard(IRBuilder<> &IRB, Function &F);
/// Load stack guard from the frame and check if it has changed.
- void checkStackGuard(IRBuilder<> &IRB, Function &F, ReturnInst &RI,
+ void checkStackGuard(IRBuilder<> &IRB, Function &F, Instruction &RI,
AllocaInst *StackGuardSlot, Value *StackGuard);
/// Find all static allocas, dynamic allocas, return instructions and
@@ -160,7 +160,7 @@ class SafeStack {
void findInsts(Function &F, SmallVectorImpl<AllocaInst *> &StaticAllocas,
SmallVectorImpl<AllocaInst *> &DynamicAllocas,
SmallVectorImpl<Argument *> &ByValArguments,
- SmallVectorImpl<ReturnInst *> &Returns,
+ SmallVectorImpl<Instruction *> &Returns,
SmallVectorImpl<Instruction *> &StackRestorePoints);
/// Calculate the allocation size of a given alloca. Returns 0 if the
@@ -168,15 +168,13 @@ class SafeStack {
uint64_t getStaticAllocaAllocationSize(const AllocaInst* AI);
/// Allocate space for all static allocas in \p StaticAllocas,
- /// replace allocas with pointers into the unsafe stack and generate code to
- /// restore the stack pointer before all return instructions in \p Returns.
+ /// replace allocas with pointers into the unsafe stack.
///
/// \returns A pointer to the top of the unsafe stack after all unsafe static
/// allocas are allocated.
Value *moveStaticAllocasToUnsafeStack(IRBuilder<> &IRB, Function &F,
ArrayRef<AllocaInst *> StaticAllocas,
ArrayRef<Argument *> ByValArguments,
- ArrayRef<ReturnInst *> Returns,
Instruction *BasePointer,
AllocaInst *StackGuardSlot);
@@ -383,7 +381,7 @@ void SafeStack::findInsts(Function &F,
SmallVectorImpl<AllocaInst *> &StaticAllocas,
SmallVectorImpl<AllocaInst *> &DynamicAllocas,
SmallVectorImpl<Argument *> &ByValArguments,
- SmallVectorImpl<ReturnInst *> &Returns,
+ SmallVectorImpl<Instruction *> &Returns,
SmallVectorImpl<Instruction *> &StackRestorePoints) {
for (Instruction &I : instructions(&F)) {
if (auto AI = dyn_cast<AllocaInst>(&I)) {
@@ -401,7 +399,10 @@ void SafeStack::findInsts(Function &F,
DynamicAllocas.push_back(AI);
}
} else if (auto RI = dyn_cast<ReturnInst>(&I)) {
- Returns.push_back(RI);
+ if (CallInst *CI = I.getParent()->getTerminatingMustTailCall())
+ Returns.push_back(CI);
+ else
+ Returns.push_back(RI);
} else if (auto CI = dyn_cast<CallInst>(&I)) {
// setjmps require stack restore.
if (CI->getCalledFunction() && CI->canReturnTwice())
@@ -465,7 +466,7 @@ SafeStack::createStackRestorePoints(IRBuilder<> &IRB, Function &F,
return DynamicTop;
}
-void SafeStack::checkStackGuard(IRBuilder<> &IRB, Function &F, ReturnInst &RI,
+void SafeStack::checkStackGuard(IRBuilder<> &IRB, Function &F, Instruction &RI,
AllocaInst *StackGuardSlot, Value *StackGuard) {
Value *V = IRB.CreateLoad(StackPtrTy, StackGuardSlot);
Value *Cmp = IRB.CreateICmpNE(StackGuard, V);
@@ -490,8 +491,8 @@ void SafeStack::checkStackGuard(IRBuilder<> &IRB, Function &F, ReturnInst &RI,
/// prologue into a local variable and restore it in the epilogue.
Value *SafeStack::moveStaticAllocasToUnsafeStack(
IRBuilder<> &IRB, Function &F, ArrayRef<AllocaInst *> StaticAllocas,
- ArrayRef<Argument *> ByValArguments, ArrayRef<ReturnInst *> Returns,
- Instruction *BasePointer, AllocaInst *StackGuardSlot) {
+ ArrayRef<Argument *> ByValArguments, Instruction *BasePointer,
+ AllocaInst *StackGuardSlot) {
if (StaticAllocas.empty() && ByValArguments.empty())
return BasePointer;
@@ -759,7 +760,7 @@ bool SafeStack::run() {
SmallVector<AllocaInst *, 16> StaticAllocas;
SmallVector<AllocaInst *, 4> DynamicAllocas;
SmallVector<Argument *, 4> ByValArguments;
- SmallVector<ReturnInst *, 4> Returns;
+ SmallVector<Instruction *, 4> Returns;
// Collect all points where stack gets unwound and needs to be restored
// This is only necessary because the runtime (setjmp and unwind code) is
@@ -788,7 +789,8 @@ bool SafeStack::run() {
// Calls must always have a debug location, or else inlining breaks. So
// we explicitly set a artificial debug location here.
if (DISubprogram *SP = F.getSubprogram())
- IRB.SetCurrentDebugLocation(DebugLoc::get(SP->getScopeLine(), 0, SP));
+ IRB.SetCurrentDebugLocation(
+ DILocation::get(SP->getContext(), SP->getScopeLine(), 0, SP));
if (SafeStackUsePointerAddress) {
FunctionCallee Fn = F.getParent()->getOrInsertFunction(
"__safestack_pointer_address", StackPtrTy->getPointerTo(0));
@@ -812,7 +814,7 @@ bool SafeStack::run() {
StackGuardSlot = IRB.CreateAlloca(StackPtrTy, nullptr);
IRB.CreateStore(StackGuard, StackGuardSlot);
- for (ReturnInst *RI : Returns) {
+ for (Instruction *RI : Returns) {
IRBuilder<> IRBRet(RI);
checkStackGuard(IRBRet, F, *RI, StackGuardSlot, StackGuard);
}
@@ -820,9 +822,8 @@ bool SafeStack::run() {
// The top of the unsafe stack after all unsafe static allocas are
// allocated.
- Value *StaticTop =
- moveStaticAllocasToUnsafeStack(IRB, F, StaticAllocas, ByValArguments,
- Returns, BasePointer, StackGuardSlot);
+ Value *StaticTop = moveStaticAllocasToUnsafeStack(
+ IRB, F, StaticAllocas, ByValArguments, BasePointer, StackGuardSlot);
// Safe stack object that stores the current unsafe stack top. It is updated
// as unsafe dynamic (non-constant-sized) allocas are allocated and freed.
@@ -838,7 +839,7 @@ bool SafeStack::run() {
DynamicAllocas);
// Restore the unsafe stack pointer before each return.
- for (ReturnInst *RI : Returns) {
+ for (Instruction *RI : Returns) {
IRB.SetInsertPoint(RI);
IRB.CreateStore(BasePointer, UnsafeStackPtr);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.cpp
index c823454f825c..5d61b3a146b4 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.cpp
@@ -7,7 +7,6 @@
//===----------------------------------------------------------------------===//
#include "SafeStackLayout.h"
-#include "llvm/Analysis/StackLifetime.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
@@ -141,10 +140,10 @@ void StackLayout::computeLayout() {
// Sort objects by size (largest first) to reduce fragmentation.
if (StackObjects.size() > 2)
- std::stable_sort(StackObjects.begin() + 1, StackObjects.end(),
- [](const StackObject &a, const StackObject &b) {
- return a.Size > b.Size;
- });
+ llvm::stable_sort(drop_begin(StackObjects),
+ [](const StackObject &a, const StackObject &b) {
+ return a.Size > b.Size;
+ });
for (auto &Obj : StackObjects)
layoutObject(Obj);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
deleted file mode 100644
index c93b29617438..000000000000
--- a/contrib/llvm-project/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
+++ /dev/null
@@ -1,911 +0,0 @@
-//===- ScalarizeMaskedMemIntrin.cpp - Scalarize unsupported masked mem ----===//
-// instrinsics
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass replaces masked memory intrinsics - when unsupported by the target
-// - with a chain of basic blocks, that deal with the elements one-by-one if the
-// appropriate mask bit is set.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/Twine.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/CodeGen/TargetSubtargetInfo.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/Constant.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/Type.h"
-#include "llvm/IR/Value.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/Casting.h"
-#include <algorithm>
-#include <cassert>
-
-using namespace llvm;
-
-#define DEBUG_TYPE "scalarize-masked-mem-intrin"
-
-namespace {
-
-class ScalarizeMaskedMemIntrin : public FunctionPass {
- const TargetTransformInfo *TTI = nullptr;
- const DataLayout *DL = nullptr;
-
-public:
- static char ID; // Pass identification, replacement for typeid
-
- explicit ScalarizeMaskedMemIntrin() : FunctionPass(ID) {
- initializeScalarizeMaskedMemIntrinPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override;
-
- StringRef getPassName() const override {
- return "Scalarize Masked Memory Intrinsics";
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<TargetTransformInfoWrapperPass>();
- }
-
-private:
- bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT);
- bool optimizeCallInst(CallInst *CI, bool &ModifiedDT);
-};
-
-} // end anonymous namespace
-
-char ScalarizeMaskedMemIntrin::ID = 0;
-
-INITIALIZE_PASS(ScalarizeMaskedMemIntrin, DEBUG_TYPE,
- "Scalarize unsupported masked memory intrinsics", false, false)
-
-FunctionPass *llvm::createScalarizeMaskedMemIntrinPass() {
- return new ScalarizeMaskedMemIntrin();
-}
-
-static bool isConstantIntVector(Value *Mask) {
- Constant *C = dyn_cast<Constant>(Mask);
- if (!C)
- return false;
-
- unsigned NumElts = cast<FixedVectorType>(Mask->getType())->getNumElements();
- for (unsigned i = 0; i != NumElts; ++i) {
- Constant *CElt = C->getAggregateElement(i);
- if (!CElt || !isa<ConstantInt>(CElt))
- return false;
- }
-
- return true;
-}
-
-// Translate a masked load intrinsic like
-// <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align,
-// <16 x i1> %mask, <16 x i32> %passthru)
-// to a chain of basic blocks, with loading element one-by-one if
-// the appropriate mask bit is set
-//
-// %1 = bitcast i8* %addr to i32*
-// %2 = extractelement <16 x i1> %mask, i32 0
-// br i1 %2, label %cond.load, label %else
-//
-// cond.load: ; preds = %0
-// %3 = getelementptr i32* %1, i32 0
-// %4 = load i32* %3
-// %5 = insertelement <16 x i32> %passthru, i32 %4, i32 0
-// br label %else
-//
-// else: ; preds = %0, %cond.load
-// %res.phi.else = phi <16 x i32> [ %5, %cond.load ], [ undef, %0 ]
-// %6 = extractelement <16 x i1> %mask, i32 1
-// br i1 %6, label %cond.load1, label %else2
-//
-// cond.load1: ; preds = %else
-// %7 = getelementptr i32* %1, i32 1
-// %8 = load i32* %7
-// %9 = insertelement <16 x i32> %res.phi.else, i32 %8, i32 1
-// br label %else2
-//
-// else2: ; preds = %else, %cond.load1
-// %res.phi.else3 = phi <16 x i32> [ %9, %cond.load1 ], [ %res.phi.else, %else ]
-// %10 = extractelement <16 x i1> %mask, i32 2
-// br i1 %10, label %cond.load4, label %else5
-//
-static void scalarizeMaskedLoad(CallInst *CI, bool &ModifiedDT) {
- Value *Ptr = CI->getArgOperand(0);
- Value *Alignment = CI->getArgOperand(1);
- Value *Mask = CI->getArgOperand(2);
- Value *Src0 = CI->getArgOperand(3);
-
- const Align AlignVal = cast<ConstantInt>(Alignment)->getAlignValue();
- VectorType *VecType = cast<FixedVectorType>(CI->getType());
-
- Type *EltTy = VecType->getElementType();
-
- IRBuilder<> Builder(CI->getContext());
- Instruction *InsertPt = CI;
- BasicBlock *IfBlock = CI->getParent();
-
- Builder.SetInsertPoint(InsertPt);
- Builder.SetCurrentDebugLocation(CI->getDebugLoc());
-
- // Short-cut if the mask is all-true.
- if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) {
- Value *NewI = Builder.CreateAlignedLoad(VecType, Ptr, AlignVal);
- CI->replaceAllUsesWith(NewI);
- CI->eraseFromParent();
- return;
- }
-
- // Adjust alignment for the scalar instruction.
- const Align AdjustedAlignVal =
- commonAlignment(AlignVal, EltTy->getPrimitiveSizeInBits() / 8);
- // Bitcast %addr from i8* to EltTy*
- Type *NewPtrType =
- EltTy->getPointerTo(Ptr->getType()->getPointerAddressSpace());
- Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
- unsigned VectorWidth = cast<FixedVectorType>(VecType)->getNumElements();
-
- // The result vector
- Value *VResult = Src0;
-
- if (isConstantIntVector(Mask)) {
- for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
- if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
- continue;
- Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
- LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AdjustedAlignVal);
- VResult = Builder.CreateInsertElement(VResult, Load, Idx);
- }
- CI->replaceAllUsesWith(VResult);
- CI->eraseFromParent();
- return;
- }
-
- // If the mask is not v1i1, use scalar bit test operations. This generates
- // better results on X86 at least.
- Value *SclrMask;
- if (VectorWidth != 1) {
- Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
- SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
- }
-
- for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
- // Fill the "else" block, created in the previous iteration
- //
- // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
- // %mask_1 = and i16 %scalar_mask, i32 1 << Idx
- // %cond = icmp ne i16 %mask_1, 0
- // br i1 %mask_1, label %cond.load, label %else
- //
- Value *Predicate;
- if (VectorWidth != 1) {
- Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx));
- Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
- Builder.getIntN(VectorWidth, 0));
- } else {
- Predicate = Builder.CreateExtractElement(Mask, Idx);
- }
-
- // Create "cond" block
- //
- // %EltAddr = getelementptr i32* %1, i32 0
- // %Elt = load i32* %EltAddr
- // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
- //
- BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt->getIterator(),
- "cond.load");
- Builder.SetInsertPoint(InsertPt);
-
- Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
- LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AdjustedAlignVal);
- Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx);
-
- // Create "else" block, fill it in the next iteration
- BasicBlock *NewIfBlock =
- CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
- Builder.SetInsertPoint(InsertPt);
- Instruction *OldBr = IfBlock->getTerminator();
- BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
- OldBr->eraseFromParent();
- BasicBlock *PrevIfBlock = IfBlock;
- IfBlock = NewIfBlock;
-
- // Create the phi to join the new and previous value.
- PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
- Phi->addIncoming(NewVResult, CondBlock);
- Phi->addIncoming(VResult, PrevIfBlock);
- VResult = Phi;
- }
-
- CI->replaceAllUsesWith(VResult);
- CI->eraseFromParent();
-
- ModifiedDT = true;
-}
-
-// Translate a masked store intrinsic, like
-// void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, i32 align,
-// <16 x i1> %mask)
-// to a chain of basic blocks, that stores element one-by-one if
-// the appropriate mask bit is set
-//
-// %1 = bitcast i8* %addr to i32*
-// %2 = extractelement <16 x i1> %mask, i32 0
-// br i1 %2, label %cond.store, label %else
-//
-// cond.store: ; preds = %0
-// %3 = extractelement <16 x i32> %val, i32 0
-// %4 = getelementptr i32* %1, i32 0
-// store i32 %3, i32* %4
-// br label %else
-//
-// else: ; preds = %0, %cond.store
-// %5 = extractelement <16 x i1> %mask, i32 1
-// br i1 %5, label %cond.store1, label %else2
-//
-// cond.store1: ; preds = %else
-// %6 = extractelement <16 x i32> %val, i32 1
-// %7 = getelementptr i32* %1, i32 1
-// store i32 %6, i32* %7
-// br label %else2
-// . . .
-static void scalarizeMaskedStore(CallInst *CI, bool &ModifiedDT) {
- Value *Src = CI->getArgOperand(0);
- Value *Ptr = CI->getArgOperand(1);
- Value *Alignment = CI->getArgOperand(2);
- Value *Mask = CI->getArgOperand(3);
-
- const Align AlignVal = cast<ConstantInt>(Alignment)->getAlignValue();
- auto *VecType = cast<VectorType>(Src->getType());
-
- Type *EltTy = VecType->getElementType();
-
- IRBuilder<> Builder(CI->getContext());
- Instruction *InsertPt = CI;
- BasicBlock *IfBlock = CI->getParent();
- Builder.SetInsertPoint(InsertPt);
- Builder.SetCurrentDebugLocation(CI->getDebugLoc());
-
- // Short-cut if the mask is all-true.
- if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) {
- Builder.CreateAlignedStore(Src, Ptr, AlignVal);
- CI->eraseFromParent();
- return;
- }
-
- // Adjust alignment for the scalar instruction.
- const Align AdjustedAlignVal =
- commonAlignment(AlignVal, EltTy->getPrimitiveSizeInBits() / 8);
- // Bitcast %addr from i8* to EltTy*
- Type *NewPtrType =
- EltTy->getPointerTo(Ptr->getType()->getPointerAddressSpace());
- Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
- unsigned VectorWidth = cast<FixedVectorType>(VecType)->getNumElements();
-
- if (isConstantIntVector(Mask)) {
- for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
- if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
- continue;
- Value *OneElt = Builder.CreateExtractElement(Src, Idx);
- Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
- Builder.CreateAlignedStore(OneElt, Gep, AdjustedAlignVal);
- }
- CI->eraseFromParent();
- return;
- }
-
- // If the mask is not v1i1, use scalar bit test operations. This generates
- // better results on X86 at least.
- Value *SclrMask;
- if (VectorWidth != 1) {
- Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
- SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
- }
-
- for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
- // Fill the "else" block, created in the previous iteration
- //
- // %mask_1 = and i16 %scalar_mask, i32 1 << Idx
- // %cond = icmp ne i16 %mask_1, 0
- // br i1 %mask_1, label %cond.store, label %else
- //
- Value *Predicate;
- if (VectorWidth != 1) {
- Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx));
- Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
- Builder.getIntN(VectorWidth, 0));
- } else {
- Predicate = Builder.CreateExtractElement(Mask, Idx);
- }
-
- // Create "cond" block
- //
- // %OneElt = extractelement <16 x i32> %Src, i32 Idx
- // %EltAddr = getelementptr i32* %1, i32 0
- // %store i32 %OneElt, i32* %EltAddr
- //
- BasicBlock *CondBlock =
- IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.store");
- Builder.SetInsertPoint(InsertPt);
-
- Value *OneElt = Builder.CreateExtractElement(Src, Idx);
- Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
- Builder.CreateAlignedStore(OneElt, Gep, AdjustedAlignVal);
-
- // Create "else" block, fill it in the next iteration
- BasicBlock *NewIfBlock =
- CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
- Builder.SetInsertPoint(InsertPt);
- Instruction *OldBr = IfBlock->getTerminator();
- BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
- OldBr->eraseFromParent();
- IfBlock = NewIfBlock;
- }
- CI->eraseFromParent();
-
- ModifiedDT = true;
-}
-
-// Translate a masked gather intrinsic like
-// <16 x i32 > @llvm.masked.gather.v16i32( <16 x i32*> %Ptrs, i32 4,
-// <16 x i1> %Mask, <16 x i32> %Src)
-// to a chain of basic blocks, with loading element one-by-one if
-// the appropriate mask bit is set
-//
-// %Ptrs = getelementptr i32, i32* %base, <16 x i64> %ind
-// %Mask0 = extractelement <16 x i1> %Mask, i32 0
-// br i1 %Mask0, label %cond.load, label %else
-//
-// cond.load:
-// %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
-// %Load0 = load i32, i32* %Ptr0, align 4
-// %Res0 = insertelement <16 x i32> undef, i32 %Load0, i32 0
-// br label %else
-//
-// else:
-// %res.phi.else = phi <16 x i32>[%Res0, %cond.load], [undef, %0]
-// %Mask1 = extractelement <16 x i1> %Mask, i32 1
-// br i1 %Mask1, label %cond.load1, label %else2
-//
-// cond.load1:
-// %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
-// %Load1 = load i32, i32* %Ptr1, align 4
-// %Res1 = insertelement <16 x i32> %res.phi.else, i32 %Load1, i32 1
-// br label %else2
-// . . .
-// %Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src
-// ret <16 x i32> %Result
-static void scalarizeMaskedGather(CallInst *CI, bool &ModifiedDT) {
- Value *Ptrs = CI->getArgOperand(0);
- Value *Alignment = CI->getArgOperand(1);
- Value *Mask = CI->getArgOperand(2);
- Value *Src0 = CI->getArgOperand(3);
-
- auto *VecType = cast<FixedVectorType>(CI->getType());
- Type *EltTy = VecType->getElementType();
-
- IRBuilder<> Builder(CI->getContext());
- Instruction *InsertPt = CI;
- BasicBlock *IfBlock = CI->getParent();
- Builder.SetInsertPoint(InsertPt);
- MaybeAlign AlignVal = cast<ConstantInt>(Alignment)->getMaybeAlignValue();
-
- Builder.SetCurrentDebugLocation(CI->getDebugLoc());
-
- // The result vector
- Value *VResult = Src0;
- unsigned VectorWidth = VecType->getNumElements();
-
- // Shorten the way if the mask is a vector of constants.
- if (isConstantIntVector(Mask)) {
- for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
- if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
- continue;
- Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
- LoadInst *Load =
- Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal, "Load" + Twine(Idx));
- VResult =
- Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx));
- }
- CI->replaceAllUsesWith(VResult);
- CI->eraseFromParent();
- return;
- }
-
- // If the mask is not v1i1, use scalar bit test operations. This generates
- // better results on X86 at least.
- Value *SclrMask;
- if (VectorWidth != 1) {
- Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
- SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
- }
-
- for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
- // Fill the "else" block, created in the previous iteration
- //
- // %Mask1 = and i16 %scalar_mask, i32 1 << Idx
- // %cond = icmp ne i16 %mask_1, 0
- // br i1 %Mask1, label %cond.load, label %else
- //
-
- Value *Predicate;
- if (VectorWidth != 1) {
- Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx));
- Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
- Builder.getIntN(VectorWidth, 0));
- } else {
- Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
- }
-
- // Create "cond" block
- //
- // %EltAddr = getelementptr i32* %1, i32 0
- // %Elt = load i32* %EltAddr
- // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
- //
- BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load");
- Builder.SetInsertPoint(InsertPt);
-
- Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
- LoadInst *Load =
- Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal, "Load" + Twine(Idx));
- Value *NewVResult =
- Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx));
-
- // Create "else" block, fill it in the next iteration
- BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
- Builder.SetInsertPoint(InsertPt);
- Instruction *OldBr = IfBlock->getTerminator();
- BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
- OldBr->eraseFromParent();
- BasicBlock *PrevIfBlock = IfBlock;
- IfBlock = NewIfBlock;
-
- PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
- Phi->addIncoming(NewVResult, CondBlock);
- Phi->addIncoming(VResult, PrevIfBlock);
- VResult = Phi;
- }
-
- CI->replaceAllUsesWith(VResult);
- CI->eraseFromParent();
-
- ModifiedDT = true;
-}
-
-// Translate a masked scatter intrinsic, like
-// void @llvm.masked.scatter.v16i32(<16 x i32> %Src, <16 x i32*>* %Ptrs, i32 4,
-// <16 x i1> %Mask)
-// to a chain of basic blocks, that stores element one-by-one if
-// the appropriate mask bit is set.
-//
-// %Ptrs = getelementptr i32, i32* %ptr, <16 x i64> %ind
-// %Mask0 = extractelement <16 x i1> %Mask, i32 0
-// br i1 %Mask0, label %cond.store, label %else
-//
-// cond.store:
-// %Elt0 = extractelement <16 x i32> %Src, i32 0
-// %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
-// store i32 %Elt0, i32* %Ptr0, align 4
-// br label %else
-//
-// else:
-// %Mask1 = extractelement <16 x i1> %Mask, i32 1
-// br i1 %Mask1, label %cond.store1, label %else2
-//
-// cond.store1:
-// %Elt1 = extractelement <16 x i32> %Src, i32 1
-// %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
-// store i32 %Elt1, i32* %Ptr1, align 4
-// br label %else2
-// . . .
-static void scalarizeMaskedScatter(CallInst *CI, bool &ModifiedDT) {
- Value *Src = CI->getArgOperand(0);
- Value *Ptrs = CI->getArgOperand(1);
- Value *Alignment = CI->getArgOperand(2);
- Value *Mask = CI->getArgOperand(3);
-
- auto *SrcFVTy = cast<FixedVectorType>(Src->getType());
-
- assert(
- isa<VectorType>(Ptrs->getType()) &&
- isa<PointerType>(cast<VectorType>(Ptrs->getType())->getElementType()) &&
- "Vector of pointers is expected in masked scatter intrinsic");
-
- IRBuilder<> Builder(CI->getContext());
- Instruction *InsertPt = CI;
- BasicBlock *IfBlock = CI->getParent();
- Builder.SetInsertPoint(InsertPt);
- Builder.SetCurrentDebugLocation(CI->getDebugLoc());
-
- MaybeAlign AlignVal = cast<ConstantInt>(Alignment)->getMaybeAlignValue();
- unsigned VectorWidth = SrcFVTy->getNumElements();
-
- // Shorten the way if the mask is a vector of constants.
- if (isConstantIntVector(Mask)) {
- for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
- if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
- continue;
- Value *OneElt =
- Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx));
- Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
- Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
- }
- CI->eraseFromParent();
- return;
- }
-
- // If the mask is not v1i1, use scalar bit test operations. This generates
- // better results on X86 at least.
- Value *SclrMask;
- if (VectorWidth != 1) {
- Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
- SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
- }
-
- for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
- // Fill the "else" block, created in the previous iteration
- //
- // %Mask1 = and i16 %scalar_mask, i32 1 << Idx
- // %cond = icmp ne i16 %mask_1, 0
- // br i1 %Mask1, label %cond.store, label %else
- //
- Value *Predicate;
- if (VectorWidth != 1) {
- Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx));
- Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
- Builder.getIntN(VectorWidth, 0));
- } else {
- Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
- }
-
- // Create "cond" block
- //
- // %Elt1 = extractelement <16 x i32> %Src, i32 1
- // %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
- // %store i32 %Elt1, i32* %Ptr1
- //
- BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store");
- Builder.SetInsertPoint(InsertPt);
-
- Value *OneElt = Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx));
- Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
- Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
-
- // Create "else" block, fill it in the next iteration
- BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
- Builder.SetInsertPoint(InsertPt);
- Instruction *OldBr = IfBlock->getTerminator();
- BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
- OldBr->eraseFromParent();
- IfBlock = NewIfBlock;
- }
- CI->eraseFromParent();
-
- ModifiedDT = true;
-}
-
-static void scalarizeMaskedExpandLoad(CallInst *CI, bool &ModifiedDT) {
- Value *Ptr = CI->getArgOperand(0);
- Value *Mask = CI->getArgOperand(1);
- Value *PassThru = CI->getArgOperand(2);
-
- auto *VecType = cast<FixedVectorType>(CI->getType());
-
- Type *EltTy = VecType->getElementType();
-
- IRBuilder<> Builder(CI->getContext());
- Instruction *InsertPt = CI;
- BasicBlock *IfBlock = CI->getParent();
-
- Builder.SetInsertPoint(InsertPt);
- Builder.SetCurrentDebugLocation(CI->getDebugLoc());
-
- unsigned VectorWidth = VecType->getNumElements();
-
- // The result vector
- Value *VResult = PassThru;
-
- // Shorten the way if the mask is a vector of constants.
- if (isConstantIntVector(Mask)) {
- unsigned MemIndex = 0;
- for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
- if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
- continue;
- Value *NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex);
- LoadInst *Load = Builder.CreateAlignedLoad(EltTy, NewPtr, Align(1),
- "Load" + Twine(Idx));
- VResult =
- Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx));
- ++MemIndex;
- }
- CI->replaceAllUsesWith(VResult);
- CI->eraseFromParent();
- return;
- }
-
- // If the mask is not v1i1, use scalar bit test operations. This generates
- // better results on X86 at least.
- Value *SclrMask;
- if (VectorWidth != 1) {
- Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
- SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
- }
-
- for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
- // Fill the "else" block, created in the previous iteration
- //
- // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
- // %mask_1 = extractelement <16 x i1> %mask, i32 Idx
- // br i1 %mask_1, label %cond.load, label %else
- //
-
- Value *Predicate;
- if (VectorWidth != 1) {
- Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx));
- Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
- Builder.getIntN(VectorWidth, 0));
- } else {
- Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
- }
-
- // Create "cond" block
- //
- // %EltAddr = getelementptr i32* %1, i32 0
- // %Elt = load i32* %EltAddr
- // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
- //
- BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt->getIterator(),
- "cond.load");
- Builder.SetInsertPoint(InsertPt);
-
- LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Ptr, Align(1));
- Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx);
-
- // Move the pointer if there are more blocks to come.
- Value *NewPtr;
- if ((Idx + 1) != VectorWidth)
- NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, 1);
-
- // Create "else" block, fill it in the next iteration
- BasicBlock *NewIfBlock =
- CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
- Builder.SetInsertPoint(InsertPt);
- Instruction *OldBr = IfBlock->getTerminator();
- BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
- OldBr->eraseFromParent();
- BasicBlock *PrevIfBlock = IfBlock;
- IfBlock = NewIfBlock;
-
- // Create the phi to join the new and previous value.
- PHINode *ResultPhi = Builder.CreatePHI(VecType, 2, "res.phi.else");
- ResultPhi->addIncoming(NewVResult, CondBlock);
- ResultPhi->addIncoming(VResult, PrevIfBlock);
- VResult = ResultPhi;
-
- // Add a PHI for the pointer if this isn't the last iteration.
- if ((Idx + 1) != VectorWidth) {
- PHINode *PtrPhi = Builder.CreatePHI(Ptr->getType(), 2, "ptr.phi.else");
- PtrPhi->addIncoming(NewPtr, CondBlock);
- PtrPhi->addIncoming(Ptr, PrevIfBlock);
- Ptr = PtrPhi;
- }
- }
-
- CI->replaceAllUsesWith(VResult);
- CI->eraseFromParent();
-
- ModifiedDT = true;
-}
-
-static void scalarizeMaskedCompressStore(CallInst *CI, bool &ModifiedDT) {
- Value *Src = CI->getArgOperand(0);
- Value *Ptr = CI->getArgOperand(1);
- Value *Mask = CI->getArgOperand(2);
-
- auto *VecType = cast<FixedVectorType>(Src->getType());
-
- IRBuilder<> Builder(CI->getContext());
- Instruction *InsertPt = CI;
- BasicBlock *IfBlock = CI->getParent();
-
- Builder.SetInsertPoint(InsertPt);
- Builder.SetCurrentDebugLocation(CI->getDebugLoc());
-
- Type *EltTy = VecType->getElementType();
-
- unsigned VectorWidth = VecType->getNumElements();
-
- // Shorten the way if the mask is a vector of constants.
- if (isConstantIntVector(Mask)) {
- unsigned MemIndex = 0;
- for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
- if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
- continue;
- Value *OneElt =
- Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx));
- Value *NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex);
- Builder.CreateAlignedStore(OneElt, NewPtr, Align(1));
- ++MemIndex;
- }
- CI->eraseFromParent();
- return;
- }
-
- // If the mask is not v1i1, use scalar bit test operations. This generates
- // better results on X86 at least.
- Value *SclrMask;
- if (VectorWidth != 1) {
- Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
- SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
- }
-
- for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
- // Fill the "else" block, created in the previous iteration
- //
- // %mask_1 = extractelement <16 x i1> %mask, i32 Idx
- // br i1 %mask_1, label %cond.store, label %else
- //
- Value *Predicate;
- if (VectorWidth != 1) {
- Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx));
- Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
- Builder.getIntN(VectorWidth, 0));
- } else {
- Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
- }
-
- // Create "cond" block
- //
- // %OneElt = extractelement <16 x i32> %Src, i32 Idx
- // %EltAddr = getelementptr i32* %1, i32 0
- // %store i32 %OneElt, i32* %EltAddr
- //
- BasicBlock *CondBlock =
- IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.store");
- Builder.SetInsertPoint(InsertPt);
-
- Value *OneElt = Builder.CreateExtractElement(Src, Idx);
- Builder.CreateAlignedStore(OneElt, Ptr, Align(1));
-
- // Move the pointer if there are more blocks to come.
- Value *NewPtr;
- if ((Idx + 1) != VectorWidth)
- NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, 1);
-
- // Create "else" block, fill it in the next iteration
- BasicBlock *NewIfBlock =
- CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
- Builder.SetInsertPoint(InsertPt);
- Instruction *OldBr = IfBlock->getTerminator();
- BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
- OldBr->eraseFromParent();
- BasicBlock *PrevIfBlock = IfBlock;
- IfBlock = NewIfBlock;
-
- // Add a PHI for the pointer if this isn't the last iteration.
- if ((Idx + 1) != VectorWidth) {
- PHINode *PtrPhi = Builder.CreatePHI(Ptr->getType(), 2, "ptr.phi.else");
- PtrPhi->addIncoming(NewPtr, CondBlock);
- PtrPhi->addIncoming(Ptr, PrevIfBlock);
- Ptr = PtrPhi;
- }
- }
- CI->eraseFromParent();
-
- ModifiedDT = true;
-}
-
-bool ScalarizeMaskedMemIntrin::runOnFunction(Function &F) {
- bool EverMadeChange = false;
-
- TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- DL = &F.getParent()->getDataLayout();
-
- bool MadeChange = true;
- while (MadeChange) {
- MadeChange = false;
- for (Function::iterator I = F.begin(); I != F.end();) {
- BasicBlock *BB = &*I++;
- bool ModifiedDTOnIteration = false;
- MadeChange |= optimizeBlock(*BB, ModifiedDTOnIteration);
-
- // Restart BB iteration if the dominator tree of the Function was changed
- if (ModifiedDTOnIteration)
- break;
- }
-
- EverMadeChange |= MadeChange;
- }
-
- return EverMadeChange;
-}
-
-bool ScalarizeMaskedMemIntrin::optimizeBlock(BasicBlock &BB, bool &ModifiedDT) {
- bool MadeChange = false;
-
- BasicBlock::iterator CurInstIterator = BB.begin();
- while (CurInstIterator != BB.end()) {
- if (CallInst *CI = dyn_cast<CallInst>(&*CurInstIterator++))
- MadeChange |= optimizeCallInst(CI, ModifiedDT);
- if (ModifiedDT)
- return true;
- }
-
- return MadeChange;
-}
-
-bool ScalarizeMaskedMemIntrin::optimizeCallInst(CallInst *CI,
- bool &ModifiedDT) {
- IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
- if (II) {
- switch (II->getIntrinsicID()) {
- default:
- break;
- case Intrinsic::masked_load:
- // Scalarize unsupported vector masked load
- if (TTI->isLegalMaskedLoad(
- CI->getType(),
- cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue()))
- return false;
- scalarizeMaskedLoad(CI, ModifiedDT);
- return true;
- case Intrinsic::masked_store:
- if (TTI->isLegalMaskedStore(
- CI->getArgOperand(0)->getType(),
- cast<ConstantInt>(CI->getArgOperand(2))->getAlignValue()))
- return false;
- scalarizeMaskedStore(CI, ModifiedDT);
- return true;
- case Intrinsic::masked_gather: {
- unsigned AlignmentInt =
- cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
- Type *LoadTy = CI->getType();
- Align Alignment =
- DL->getValueOrABITypeAlignment(MaybeAlign(AlignmentInt), LoadTy);
- if (TTI->isLegalMaskedGather(LoadTy, Alignment))
- return false;
- scalarizeMaskedGather(CI, ModifiedDT);
- return true;
- }
- case Intrinsic::masked_scatter: {
- unsigned AlignmentInt =
- cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
- Type *StoreTy = CI->getArgOperand(0)->getType();
- Align Alignment =
- DL->getValueOrABITypeAlignment(MaybeAlign(AlignmentInt), StoreTy);
- if (TTI->isLegalMaskedScatter(StoreTy, Alignment))
- return false;
- scalarizeMaskedScatter(CI, ModifiedDT);
- return true;
- }
- case Intrinsic::masked_expandload:
- if (TTI->isLegalMaskedExpandLoad(CI->getType()))
- return false;
- scalarizeMaskedExpandLoad(CI, ModifiedDT);
- return true;
- case Intrinsic::masked_compressstore:
- if (TTI->isLegalMaskedCompressStore(CI->getArgOperand(0)->getType()))
- return false;
- scalarizeMaskedCompressStore(CI, ModifiedDT);
- return true;
- }
- }
-
- return false;
-}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
index 10da2d421797..5899da777fe9 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -154,7 +154,7 @@ static bool getUnderlyingObjectsForInstr(const MachineInstr *MI,
Objects.push_back(UnderlyingObjectsVector::value_type(PSV, MayAlias));
} else if (const Value *V = MMO->getValue()) {
SmallVector<Value *, 4> Objs;
- if (!getUnderlyingObjectsForCodeGen(V, Objs, DL))
+ if (!getUnderlyingObjectsForCodeGen(V, Objs))
return false;
for (Value *V : Objs) {
@@ -199,7 +199,10 @@ void ScheduleDAGInstrs::exitRegion() {
}
void ScheduleDAGInstrs::addSchedBarrierDeps() {
- MachineInstr *ExitMI = RegionEnd != BB->end() ? &*RegionEnd : nullptr;
+ MachineInstr *ExitMI =
+ RegionEnd != BB->end()
+ ? &*skipDebugInstructionsBackward(RegionEnd, RegionBegin)
+ : nullptr;
ExitSU.setInstr(ExitMI);
// Add dependencies on the defs and uses of the instruction.
if (ExitMI) {
@@ -241,8 +244,6 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
!DefMIDesc->hasImplicitDefOfPhysReg(MO.getReg()));
for (MCRegAliasIterator Alias(MO.getReg(), TRI, true);
Alias.isValid(); ++Alias) {
- if (!Uses.contains(*Alias))
- continue;
for (Reg2SUnitsMap::iterator I = Uses.find(*Alias); I != Uses.end(); ++I) {
SUnit *UseSU = I->SU;
if (UseSU == SU)
@@ -513,6 +514,8 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
/// TODO: Handle ExitSU "uses" properly.
void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
const MachineInstr *MI = SU->getInstr();
+ assert(!MI->isDebugInstr());
+
const MachineOperand &MO = MI->getOperand(OperIdx);
Register Reg = MO.getReg();
@@ -804,7 +807,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AAResults *AA,
DbgMI = nullptr;
}
- if (MI.isDebugValue()) {
+ if (MI.isDebugValue() || MI.isDebugRef()) {
DbgMI = &MI;
continue;
}
@@ -1184,7 +1187,7 @@ std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
else if (SU == &ExitSU)
oss << "<exit>";
else
- SU->getInstr()->print(oss, /*SkipOpers=*/true);
+ SU->getInstr()->print(oss, /*IsStandalone=*/true);
return oss.str();
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp
index a113c30f851b..05b2a3764cca 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp
@@ -35,7 +35,7 @@ namespace llvm {
return true;
}
- static bool isNodeHidden(const SUnit *Node) {
+ static bool isNodeHidden(const SUnit *Node, const ScheduleDAG *G) {
return (Node->NumPreds > 10 || Node->NumSuccs > 10);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index ec384d2a7c56..615bea2a4905 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -24,12 +24,14 @@
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/DAGCombine.h"
#include "llvm/CodeGen/ISDOpcodes.h"
@@ -410,9 +412,11 @@ namespace {
SDValue visitSUBO(SDNode *N);
SDValue visitADDE(SDNode *N);
SDValue visitADDCARRY(SDNode *N);
+ SDValue visitSADDO_CARRY(SDNode *N);
SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
SDValue visitSUBE(SDNode *N);
SDValue visitSUBCARRY(SDNode *N);
+ SDValue visitSSUBO_CARRY(SDNode *N);
SDValue visitMUL(SDNode *N);
SDValue visitMULFIX(SDNode *N);
SDValue useDivRem(SDNode *N);
@@ -464,6 +468,7 @@ namespace {
SDValue visitFREEZE(SDNode *N);
SDValue visitBUILD_PAIR(SDNode *N);
SDValue visitFADD(SDNode *N);
+ SDValue visitSTRICT_FADD(SDNode *N);
SDValue visitFSUB(SDNode *N);
SDValue visitFMUL(SDNode *N);
SDValue visitFMA(SDNode *N);
@@ -539,6 +544,7 @@ namespace {
SDValue convertSelectOfFPConstantsToLoadOffset(
const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
ISD::CondCode CC);
+ SDValue foldSignChangeInBitcast(SDNode *N);
SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
SDValue N2, SDValue N3, ISD::CondCode CC);
SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
@@ -586,7 +592,7 @@ namespace {
const SDLoc &DL);
SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
SDValue MatchLoadCombine(SDNode *N);
- SDValue MatchStoreCombine(StoreSDNode *N);
+ SDValue mergeTruncStores(StoreSDNode *N);
SDValue ReduceLoadWidth(SDNode *N);
SDValue ReduceLoadOpStoreWidth(SDNode *N);
SDValue splitMergedValStore(StoreSDNode *ST);
@@ -641,14 +647,18 @@ namespace {
// Classify the origin of a stored value.
enum class StoreSource { Unknown, Constant, Extract, Load };
StoreSource getStoreSource(SDValue StoreVal) {
- if (isa<ConstantSDNode>(StoreVal) || isa<ConstantFPSDNode>(StoreVal))
+ switch (StoreVal.getOpcode()) {
+ case ISD::Constant:
+ case ISD::ConstantFP:
return StoreSource::Constant;
- if (StoreVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
- StoreVal.getOpcode() == ISD::EXTRACT_SUBVECTOR)
+ case ISD::EXTRACT_VECTOR_ELT:
+ case ISD::EXTRACT_SUBVECTOR:
return StoreSource::Extract;
- if (isa<LoadSDNode>(StoreVal))
+ case ISD::LOAD:
return StoreSource::Load;
- return StoreSource::Unknown;
+ default:
+ return StoreSource::Unknown;
+ }
}
/// This is a helper function for visitMUL to check the profitability
@@ -752,9 +762,7 @@ namespace {
/// is legal or custom before legalizing operations, and whether is
/// legal (but not custom) after legalization.
bool hasOperation(unsigned Opcode, EVT VT) {
- if (LegalOperations)
- return TLI.isOperationLegal(Opcode, VT);
- return TLI.isOperationLegalOrCustom(Opcode, VT);
+ return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
}
public:
@@ -924,23 +932,40 @@ bool DAGCombiner::isOneUseSetCC(SDValue N) const {
return false;
}
-// Returns the SDNode if it is a constant float BuildVector
-// or constant float.
-static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
- if (isa<ConstantFPSDNode>(N))
- return N.getNode();
- if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
- return N.getNode();
- return nullptr;
+static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy) {
+ if (!ScalarTy.isSimple())
+ return false;
+
+ uint64_t MaskForTy = 0ULL;
+ switch (ScalarTy.getSimpleVT().SimpleTy) {
+ case MVT::i8:
+ MaskForTy = 0xFFULL;
+ break;
+ case MVT::i16:
+ MaskForTy = 0xFFFFULL;
+ break;
+ case MVT::i32:
+ MaskForTy = 0xFFFFFFFFULL;
+ break;
+ default:
+ return false;
+ break;
+ }
+
+ APInt Val;
+ if (ISD::isConstantSplatVector(N, Val))
+ return Val.getLimitedValue() == MaskForTy;
+
+ return false;
}
-// Determines if it is a constant integer or a build vector of constant
+// Determines if it is a constant integer or a splat/build vector of constant
// integers (and undefs).
// Do not permit build vector implicit truncation.
static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
return !(Const->isOpaque() && NoOpaques);
- if (N.getOpcode() != ISD::BUILD_VECTOR)
+ if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
return false;
unsigned BitWidth = N.getScalarValueSizeInBits();
for (const SDValue &Op : N->op_values()) {
@@ -1554,9 +1579,15 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
DAG.ReplaceAllUsesWith(N, &RV);
}
- // Push the new node and any users onto the worklist
- AddToWorklist(RV.getNode());
- AddUsersToWorklist(RV.getNode());
+ // Push the new node and any users onto the worklist. Omit this if the
+ // new node is the EntryToken (e.g. if a store managed to get optimized
+ // out), because re-visiting the EntryToken and its users will not uncover
+ // any additional opportunities, but there may be a large number of such
+ // users, potentially causing compile time explosion.
+ if (RV.getOpcode() != ISD::EntryToken) {
+ AddToWorklist(RV.getNode());
+ AddUsersToWorklist(RV.getNode());
+ }
// Finally, if the node is now dead, remove it from the graph. The node
// may not be dead if the replacement process recursively simplified to
@@ -1589,8 +1620,10 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::USUBO: return visitSUBO(N);
case ISD::ADDE: return visitADDE(N);
case ISD::ADDCARRY: return visitADDCARRY(N);
+ case ISD::SADDO_CARRY: return visitSADDO_CARRY(N);
case ISD::SUBE: return visitSUBE(N);
case ISD::SUBCARRY: return visitSUBCARRY(N);
+ case ISD::SSUBO_CARRY: return visitSSUBO_CARRY(N);
case ISD::SMULFIX:
case ISD::SMULFIXSAT:
case ISD::UMULFIX:
@@ -1646,6 +1679,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::BITCAST: return visitBITCAST(N);
case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
case ISD::FADD: return visitFADD(N);
+ case ISD::STRICT_FADD: return visitSTRICT_FADD(N);
case ISD::FSUB: return visitFSUB(N);
case ISD::FMUL: return visitFMUL(N);
case ISD::FMA: return visitFMA(N);
@@ -1805,6 +1839,10 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
if (OptLevel == CodeGenOpt::None)
return SDValue();
+ // Don't simplify the token factor if the node itself has too many operands.
+ if (N->getNumOperands() > TokenFactorInlineLimit)
+ return SDValue();
+
// If the sole user is a token factor, we should make sure we have a
// chance to merge them together. This prevents TF chains from inhibiting
// optimizations.
@@ -1890,7 +1928,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
// If this is an Op, we can remove the op from the list. Remark any
// search associated with it as from the current OpNumber.
- if (SeenOps.count(Op) != 0) {
+ if (SeenOps.contains(Op)) {
Changed = true;
DidPruneOps = true;
unsigned OrigOpNumber = 0;
@@ -2002,6 +2040,62 @@ static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
}
+/// Return true if 'Use' is a load or a store that uses N as its base pointer
+/// and that N may be folded in the load / store addressing mode.
+static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ EVT VT;
+ unsigned AS;
+
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
+ if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
+ return false;
+ VT = LD->getMemoryVT();
+ AS = LD->getAddressSpace();
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
+ if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
+ return false;
+ VT = ST->getMemoryVT();
+ AS = ST->getAddressSpace();
+ } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
+ if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
+ return false;
+ VT = LD->getMemoryVT();
+ AS = LD->getAddressSpace();
+ } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
+ if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
+ return false;
+ VT = ST->getMemoryVT();
+ AS = ST->getAddressSpace();
+ } else
+ return false;
+
+ TargetLowering::AddrMode AM;
+ if (N->getOpcode() == ISD::ADD) {
+ AM.HasBaseReg = true;
+ ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (Offset)
+ // [reg +/- imm]
+ AM.BaseOffs = Offset->getSExtValue();
+ else
+ // [reg +/- reg]
+ AM.Scale = 1;
+ } else if (N->getOpcode() == ISD::SUB) {
+ AM.HasBaseReg = true;
+ ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (Offset)
+ // [reg +/- imm]
+ AM.BaseOffs = -Offset->getSExtValue();
+ else
+ // [reg +/- reg]
+ AM.Scale = 1;
+ } else
+ return false;
+
+ return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
+ VT.getTypeForEVT(*DAG.getContext()), AS);
+}
+
SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
"Unexpected binary operator");
@@ -2021,12 +2115,12 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
SDValue CT = Sel.getOperand(1);
if (!isConstantOrConstantVector(CT, true) &&
- !isConstantFPBuildVectorOrConstantFP(CT))
+ !DAG.isConstantFPBuildVectorOrConstantFP(CT))
return SDValue();
SDValue CF = Sel.getOperand(2);
if (!isConstantOrConstantVector(CF, true) &&
- !isConstantFPBuildVectorOrConstantFP(CF))
+ !DAG.isConstantFPBuildVectorOrConstantFP(CF))
return SDValue();
// Bail out if any constants are opaque because we can't constant fold those.
@@ -2043,19 +2137,10 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
SDValue CBO = BO->getOperand(SelOpNo ^ 1);
if (!CanFoldNonConst &&
!isConstantOrConstantVector(CBO, true) &&
- !isConstantFPBuildVectorOrConstantFP(CBO))
+ !DAG.isConstantFPBuildVectorOrConstantFP(CBO))
return SDValue();
- EVT VT = Sel.getValueType();
-
- // In case of shift value and shift amount may have different VT. For instance
- // on x86 shift amount is i8 regardles of LHS type. Bail out if we have
- // swapped operands and value types do not match. NB: x86 is fine if operands
- // are not swapped with shift amount VT being not bigger than shifted value.
- // TODO: that is possible to check for a shift operation, correct VTs and
- // still perform optimization on x86 if needed.
- if (SelOpNo && VT != CBO.getValueType())
- return SDValue();
+ EVT VT = BO->getValueType(0);
// We have a select-of-constants followed by a binary operator with a
// constant. Eliminate the binop by pulling the constant math into the select.
@@ -2065,14 +2150,14 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
: DAG.getNode(BinOpcode, DL, VT, CT, CBO);
if (!CanFoldNonConst && !NewCT.isUndef() &&
!isConstantOrConstantVector(NewCT, true) &&
- !isConstantFPBuildVectorOrConstantFP(NewCT))
+ !DAG.isConstantFPBuildVectorOrConstantFP(NewCT))
return SDValue();
SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
: DAG.getNode(BinOpcode, DL, VT, CF, CBO);
if (!CanFoldNonConst && !NewCF.isUndef() &&
!isConstantOrConstantVector(NewCF, true) &&
- !isConstantFPBuildVectorOrConstantFP(NewCF))
+ !DAG.isConstantFPBuildVectorOrConstantFP(NewCF))
return SDValue();
SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
@@ -2402,8 +2487,8 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
// Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
- APInt C0 = N0->getConstantOperandAPInt(0);
- APInt C1 = N1->getConstantOperandAPInt(0);
+ const APInt &C0 = N0->getConstantOperandAPInt(0);
+ const APInt &C1 = N1->getConstantOperandAPInt(0);
return DAG.getVScale(DL, VT, C0 + C1);
}
@@ -2411,9 +2496,9 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
if ((N0.getOpcode() == ISD::ADD) &&
(N0.getOperand(1).getOpcode() == ISD::VSCALE) &&
(N1.getOpcode() == ISD::VSCALE)) {
- auto VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
- auto VS1 = N1->getConstantOperandAPInt(0);
- auto VS = DAG.getVScale(DL, VT, VS0 + VS1);
+ const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
+ const APInt &VS1 = N1->getConstantOperandAPInt(0);
+ SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
}
@@ -2631,36 +2716,18 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
return SDValue();
}
-static SDValue flipBoolean(SDValue V, const SDLoc &DL,
- SelectionDAG &DAG, const TargetLowering &TLI) {
- EVT VT = V.getValueType();
-
- SDValue Cst;
- switch (TLI.getBooleanContents(VT)) {
- case TargetLowering::ZeroOrOneBooleanContent:
- case TargetLowering::UndefinedBooleanContent:
- Cst = DAG.getConstant(1, DL, VT);
- break;
- case TargetLowering::ZeroOrNegativeOneBooleanContent:
- Cst = DAG.getAllOnesConstant(DL, VT);
- break;
- }
-
- return DAG.getNode(ISD::XOR, DL, VT, V, Cst);
-}
-
/**
* Flips a boolean if it is cheaper to compute. If the Force parameters is set,
* then the flip also occurs if computing the inverse is the same cost.
* This function returns an empty SDValue in case it cannot flip the boolean
* without increasing the cost of the computation. If you want to flip a boolean
- * no matter what, use flipBoolean.
+ * no matter what, use DAG.getLogicalNOT.
*/
static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,
const TargetLowering &TLI,
bool Force) {
if (Force && isa<ConstantSDNode>(V))
- return flipBoolean(V, SDLoc(V), DAG, TLI);
+ return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
if (V.getOpcode() != ISD::XOR)
return SDValue();
@@ -2687,7 +2754,7 @@ static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,
if (IsFlip)
return V.getOperand(0);
if (Force)
- return flipBoolean(V, SDLoc(V), DAG, TLI);
+ return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
return SDValue();
}
@@ -2724,8 +2791,8 @@ SDValue DAGCombiner::visitADDO(SDNode *N) {
if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
DAG.getConstant(0, DL, VT), N0.getOperand(0));
- return CombineTo(N, Sub,
- flipBoolean(Sub.getValue(1), DL, DAG, TLI));
+ return CombineTo(
+ N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
}
if (SDValue Combined = visitUADDOLike(N0, N1, N))
@@ -2820,6 +2887,28 @@ SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue CarryIn = N->getOperand(2);
+ SDLoc DL(N);
+
+ // canonicalize constant to RHS
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
+
+ // fold (saddo_carry x, y, false) -> (saddo x, y)
+ if (isNullConstant(CarryIn)) {
+ if (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
+ return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
+ }
+
+ return SDValue();
+}
+
/**
* If we are facing some sort of diamond carry propapagtion pattern try to
* break it up to generate something like:
@@ -3005,8 +3094,8 @@ SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
SDLoc DL(N);
SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1,
N0.getOperand(0), NotC);
- return CombineTo(N, Sub,
- flipBoolean(Sub.getValue(1), DL, DAG, TLI));
+ return CombineTo(
+ N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
}
// Iff the flag result is dead:
@@ -3111,6 +3200,13 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
// 0 - X --> X if X is 0 or the minimum signed value.
return N1;
}
+
+ // Convert 0 - abs(x).
+ SDValue Result;
+ if (N1->getOpcode() == ISD::ABS &&
+ !TLI.isOperationLegalOrCustom(ISD::ABS, VT) &&
+ TLI.expandABS(N1.getNode(), Result, DAG, true))
+ return Result;
}
// Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
@@ -3306,12 +3402,10 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
SDValue S0 = N1.getOperand(0);
- if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0)) {
- unsigned OpSizeInBits = VT.getScalarSizeInBits();
+ if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0))
if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
- if (C->getAPIntValue() == (OpSizeInBits - 1))
+ if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
- }
}
}
@@ -3342,7 +3436,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
// canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
if (N1.getOpcode() == ISD::VSCALE) {
- APInt IntVal = N1.getConstantOperandAPInt(0);
+ const APInt &IntVal = N1.getConstantOperandAPInt(0);
return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
}
@@ -3501,6 +3595,21 @@ SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue CarryIn = N->getOperand(2);
+
+ // fold (ssubo_carry x, y, false) -> (ssubo x, y)
+ if (isNullConstant(CarryIn)) {
+ if (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
+ return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
+ }
+
+ return SDValue();
+}
+
// Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
// UMULFIXSAT here.
SDValue DAGCombiner::visitMULFIX(SDNode *N) {
@@ -3606,19 +3715,30 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
getShiftAmountTy(N0.getValueType()))));
}
- // Try to transform multiply-by-(power-of-2 +/- 1) into shift and add/sub.
+ // Try to transform:
+ // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
// mul x, (2^N + 1) --> add (shl x, N), x
// mul x, (2^N - 1) --> sub (shl x, N), x
// Examples: x * 33 --> (x << 5) + x
// x * 15 --> (x << 4) - x
// x * -33 --> -((x << 5) + x)
// x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
+ // (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
+ // mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
+ // mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
+ // Examples: x * 0x8800 --> (x << 15) + (x << 11)
+ // x * 0xf800 --> (x << 16) - (x << 11)
+ // x * -0x8800 --> -((x << 15) + (x << 11))
+ // x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
// TODO: We could handle more general decomposition of any constant by
// having the target set a limit on number of ops and making a
// callback to determine that sequence (similar to sqrt expansion).
unsigned MathOp = ISD::DELETED_NODE;
APInt MulC = ConstValue1.abs();
+ // The constant `2` should be treated as (2^0 + 1).
+ unsigned TZeros = MulC == 2 ? 0 : MulC.countTrailingZeros();
+ MulC.lshrInPlace(TZeros);
if ((MulC - 1).isPowerOf2())
MathOp = ISD::ADD;
else if ((MulC + 1).isPowerOf2())
@@ -3627,12 +3747,17 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
if (MathOp != ISD::DELETED_NODE) {
unsigned ShAmt =
MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
+ ShAmt += TZeros;
assert(ShAmt < VT.getScalarSizeInBits() &&
"multiply-by-constant generated out of bounds shift");
SDLoc DL(N);
SDValue Shl =
DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
- SDValue R = DAG.getNode(MathOp, DL, VT, Shl, N0);
+ SDValue R =
+ TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
+ DAG.getNode(ISD::SHL, DL, VT, N0,
+ DAG.getConstant(TZeros, DL, VT)))
+ : DAG.getNode(MathOp, DL, VT, Shl, N0);
if (ConstValue1.isNegative())
R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
return R;
@@ -3684,11 +3809,42 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
// Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
if (N0.getOpcode() == ISD::VSCALE)
if (ConstantSDNode *NC1 = isConstOrConstSplat(N1)) {
- APInt C0 = N0.getConstantOperandAPInt(0);
- APInt C1 = NC1->getAPIntValue();
+ const APInt &C0 = N0.getConstantOperandAPInt(0);
+ const APInt &C1 = NC1->getAPIntValue();
return DAG.getVScale(SDLoc(N), VT, C0 * C1);
}
+ // Fold ((mul x, 0/undef) -> 0,
+ // (mul x, 1) -> x) -> x)
+ // -> and(x, mask)
+ // We can replace vectors with '0' and '1' factors with a clearing mask.
+ if (VT.isFixedLengthVector()) {
+ unsigned NumElts = VT.getVectorNumElements();
+ SmallBitVector ClearMask;
+ ClearMask.reserve(NumElts);
+ auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
+ if (!V || V->isNullValue()) {
+ ClearMask.push_back(true);
+ return true;
+ }
+ ClearMask.push_back(false);
+ return V->isOne();
+ };
+ if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
+ ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) {
+ assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector");
+ SDLoc DL(N);
+ EVT LegalSVT = N1.getOperand(0).getValueType();
+ SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
+ SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
+ SmallVector<SDValue, 16> Mask(NumElts, AllOnes);
+ for (unsigned I = 0; I != NumElts; ++I)
+ if (ClearMask[I])
+ Mask[I] = Zero;
+ return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask));
+ }
+ }
+
// reassociate mul
if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
return RMUL;
@@ -4108,9 +4264,9 @@ SDValue DAGCombiner::visitREM(SDNode *N) {
if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
} else {
- SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
if (DAG.isKnownToBeAPowerOfTwo(N1)) {
// fold (urem x, pow2) -> (and x, pow2-1)
+ SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
AddToWorklist(Add.getNode());
return DAG.getNode(ISD::AND, DL, VT, N0, Add);
@@ -4118,6 +4274,7 @@ SDValue DAGCombiner::visitREM(SDNode *N) {
if (N1.getOpcode() == ISD::SHL &&
DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
// fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
+ SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
AddToWorklist(Add.getNode());
return DAG.getNode(ISD::AND, DL, VT, N0, Add);
@@ -4186,7 +4343,8 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
// If the type twice as wide is legal, transform the mulhs to a wider multiply
// plus a shift.
- if (!TLI.isMulhCheaperThanMulShift(VT) && VT.isSimple() && !VT.isVector()) {
+ if (!TLI.isOperationLegalOrCustom(ISD::MULHS, VT) && VT.isSimple() &&
+ !VT.isVector()) {
MVT Simple = VT.getSimpleVT();
unsigned SimpleSize = Simple.getSizeInBits();
EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
@@ -4242,7 +4400,8 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
// If the type twice as wide is legal, transform the mulhu to a wider multiply
// plus a shift.
- if (!TLI.isMulhCheaperThanMulShift(VT) && VT.isSimple() && !VT.isVector()) {
+ if (!TLI.isOperationLegalOrCustom(ISD::MULHU, VT) && VT.isSimple() &&
+ !VT.isVector()) {
MVT Simple = VT.getSimpleVT();
unsigned SimpleSize = Simple.getSizeInBits();
EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
@@ -4448,6 +4607,10 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
}
+ // Simplify the operands using demanded-bits information.
+ if (SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
return SDValue();
}
@@ -4916,8 +5079,15 @@ bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
if (!LDST->isSimple())
return false;
+ EVT LdStMemVT = LDST->getMemoryVT();
+
+ // Bail out when changing the scalable property, since we can't be sure that
+ // we're actually narrowing here.
+ if (LdStMemVT.isScalableVector() != MemVT.isScalableVector())
+ return false;
+
// Verify that we are actually reducing a load width here.
- if (LDST->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits())
+ if (LdStMemVT.bitsLT(MemVT))
return false;
// Ensure that this isn't going to produce an unsupported memory access.
@@ -5272,6 +5442,31 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
return N1;
if (ISD::isBuildVectorAllOnes(N1.getNode()))
return N0;
+
+ // fold (and (masked_load) (build_vec (x, ...))) to zext_masked_load
+ auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0);
+ auto *BVec = dyn_cast<BuildVectorSDNode>(N1);
+ if (MLoad && BVec && MLoad->getExtensionType() == ISD::EXTLOAD &&
+ N0.hasOneUse() && N1.hasOneUse()) {
+ EVT LoadVT = MLoad->getMemoryVT();
+ EVT ExtVT = VT;
+ if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) {
+ // For this AND to be a zero extension of the masked load the elements
+ // of the BuildVec must mask the bottom bits of the extended element
+ // type
+ if (ConstantSDNode *Splat = BVec->getConstantSplatNode()) {
+ uint64_t ElementSize =
+ LoadVT.getVectorElementType().getScalarSizeInBits();
+ if (Splat->getAPIntValue().isMask(ElementSize)) {
+ return DAG.getMaskedLoad(
+ ExtVT, SDLoc(N), MLoad->getChain(), MLoad->getBasePtr(),
+ MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
+ LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
+ ISD::ZEXTLOAD, MLoad->isExpandingLoad());
+ }
+ }
+ }
+ }
}
// fold (and c1, c2) -> c1&c2
@@ -5440,6 +5635,28 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
}
}
+ // fold (and (masked_gather x)) -> (zext_masked_gather x)
+ if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
+ EVT MemVT = GN0->getMemoryVT();
+ EVT ScalarVT = MemVT.getScalarType();
+
+ if (SDValue(GN0, 0).hasOneUse() &&
+ isConstantSplatVectorMaskForType(N1.getNode(), ScalarVT) &&
+ TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
+ SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
+ GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
+
+ SDValue ZExtLoad = DAG.getMaskedGather(
+ DAG.getVTList(VT, MVT::Other), MemVT, SDLoc(N), Ops,
+ GN0->getMemOperand(), GN0->getIndexType(), ISD::ZEXTLOAD);
+
+ CombineTo(N, ZExtLoad);
+ AddToWorklist(ZExtLoad.getNode());
+ // Avoid recheck of N.
+ return SDValue(N, 0);
+ }
+ }
+
// fold (and (load x), 255) -> (zextload x, i8)
// fold (and (extload x, i16), 255) -> (zextload x, i8)
// fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
@@ -5534,6 +5751,31 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
return V;
+ // Recognize the following pattern:
+ //
+ // AndVT = (and (sign_extend NarrowVT to AndVT) #bitmask)
+ //
+ // where bitmask is a mask that clears the upper bits of AndVT. The
+ // number of bits in bitmask must be a power of two.
+ auto IsAndZeroExtMask = [](SDValue LHS, SDValue RHS) {
+ if (LHS->getOpcode() != ISD::SIGN_EXTEND)
+ return false;
+
+ auto *C = dyn_cast<ConstantSDNode>(RHS);
+ if (!C)
+ return false;
+
+ if (!C->getAPIntValue().isMask(
+ LHS.getOperand(0).getValueType().getFixedSizeInBits()))
+ return false;
+
+ return true;
+ };
+
+ // Replace (and (sign_extend ...) #bitmask) with (zero_extend ...).
+ if (IsAndZeroExtMask(N0, N1))
+ return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0.getOperand(0));
+
return SDValue();
}
@@ -6782,11 +7024,11 @@ calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
return None;
}
-static unsigned LittleEndianByteAt(unsigned BW, unsigned i) {
+static unsigned littleEndianByteAt(unsigned BW, unsigned i) {
return i;
}
-static unsigned BigEndianByteAt(unsigned BW, unsigned i) {
+static unsigned bigEndianByteAt(unsigned BW, unsigned i) {
return BW - i - 1;
}
@@ -6803,8 +7045,8 @@ static Optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
bool BigEndian = true, LittleEndian = true;
for (unsigned i = 0; i < Width; i++) {
int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
- LittleEndian &= CurrentByteOffset == LittleEndianByteAt(Width, i);
- BigEndian &= CurrentByteOffset == BigEndianByteAt(Width, i);
+ LittleEndian &= CurrentByteOffset == littleEndianByteAt(Width, i);
+ BigEndian &= CurrentByteOffset == bigEndianByteAt(Width, i);
if (!BigEndian && !LittleEndian)
return None;
}
@@ -6847,80 +7089,90 @@ static SDValue stripTruncAndExt(SDValue Value) {
/// p[3] = (val >> 0) & 0xFF;
/// =>
/// *((i32)p) = BSWAP(val);
-SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) {
+SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
+ // The matching looks for "store (trunc x)" patterns that appear early but are
+ // likely to be replaced by truncating store nodes during combining.
+ // TODO: If there is evidence that running this later would help, this
+ // limitation could be removed. Legality checks may need to be added
+ // for the created store and optional bswap/rotate.
+ if (LegalOperations)
+ return SDValue();
+
// Collect all the stores in the chain.
SDValue Chain;
SmallVector<StoreSDNode *, 8> Stores;
for (StoreSDNode *Store = N; Store; Store = dyn_cast<StoreSDNode>(Chain)) {
// TODO: Allow unordered atomics when wider type is legal (see D66309)
- if (Store->getMemoryVT() != MVT::i8 ||
+ EVT MemVT = Store->getMemoryVT();
+ if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
!Store->isSimple() || Store->isIndexed())
return SDValue();
Stores.push_back(Store);
Chain = Store->getChain();
}
- // Handle the simple type only.
- unsigned Width = Stores.size();
- EVT VT = EVT::getIntegerVT(
- *DAG.getContext(), Width * N->getMemoryVT().getSizeInBits());
- if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
+ // There is no reason to continue if we do not have at least a pair of stores.
+ if (Stores.size() < 2)
return SDValue();
- if (LegalOperations && !TLI.isOperationLegal(ISD::STORE, VT))
+ // Handle simple types only.
+ LLVMContext &Context = *DAG.getContext();
+ unsigned NumStores = Stores.size();
+ unsigned NarrowNumBits = N->getMemoryVT().getScalarSizeInBits();
+ unsigned WideNumBits = NumStores * NarrowNumBits;
+ EVT WideVT = EVT::getIntegerVT(Context, WideNumBits);
+ if (WideVT != MVT::i16 && WideVT != MVT::i32 && WideVT != MVT::i64)
return SDValue();
- // Check if all the bytes of the combined value we are looking at are stored
- // to the same base address. Collect bytes offsets from Base address into
- // ByteOffsets.
- SDValue CombinedValue;
- SmallVector<int64_t, 8> ByteOffsets(Width, INT64_MAX);
+ // Check if all bytes of the source value that we are looking at are stored
+ // to the same base address. Collect offsets from Base address into OffsetMap.
+ SDValue SourceValue;
+ SmallVector<int64_t, 8> OffsetMap(NumStores, INT64_MAX);
int64_t FirstOffset = INT64_MAX;
StoreSDNode *FirstStore = nullptr;
Optional<BaseIndexOffset> Base;
for (auto Store : Stores) {
- // All the stores store different byte of the CombinedValue. A truncate is
- // required to get that byte value.
+ // All the stores store different parts of the CombinedValue. A truncate is
+ // required to get the partial value.
SDValue Trunc = Store->getValue();
if (Trunc.getOpcode() != ISD::TRUNCATE)
return SDValue();
- // A shift operation is required to get the right byte offset, except the
- // first byte.
+ // Other than the first/last part, a shift operation is required to get the
+ // offset.
int64_t Offset = 0;
- SDValue Value = Trunc.getOperand(0);
- if (Value.getOpcode() == ISD::SRL ||
- Value.getOpcode() == ISD::SRA) {
- auto *ShiftOffset = dyn_cast<ConstantSDNode>(Value.getOperand(1));
- // Trying to match the following pattern. The shift offset must be
- // a constant and a multiple of 8. It is the byte offset in "y".
+ SDValue WideVal = Trunc.getOperand(0);
+ if ((WideVal.getOpcode() == ISD::SRL || WideVal.getOpcode() == ISD::SRA) &&
+ isa<ConstantSDNode>(WideVal.getOperand(1))) {
+ // The shift amount must be a constant multiple of the narrow type.
+ // It is translated to the offset address in the wide source value "y".
//
- // x = srl y, offset
+ // x = srl y, ShiftAmtC
// i8 z = trunc x
// store z, ...
- if (!ShiftOffset || (ShiftOffset->getSExtValue() % 8))
+ uint64_t ShiftAmtC = WideVal.getConstantOperandVal(1);
+ if (ShiftAmtC % NarrowNumBits != 0)
return SDValue();
- Offset = ShiftOffset->getSExtValue()/8;
- Value = Value.getOperand(0);
+ Offset = ShiftAmtC / NarrowNumBits;
+ WideVal = WideVal.getOperand(0);
}
- // Stores must share the same combined value with different offsets.
- if (!CombinedValue)
- CombinedValue = Value;
- else if (stripTruncAndExt(CombinedValue) != stripTruncAndExt(Value))
+ // Stores must share the same source value with different offsets.
+ // Truncate and extends should be stripped to get the single source value.
+ if (!SourceValue)
+ SourceValue = WideVal;
+ else if (stripTruncAndExt(SourceValue) != stripTruncAndExt(WideVal))
return SDValue();
-
- // The trunc and all the extend operation should be stripped to get the
- // real value we are stored.
- else if (CombinedValue.getValueType() != VT) {
- if (Value.getValueType() == VT ||
- Value.getValueSizeInBits() > CombinedValue.getValueSizeInBits())
- CombinedValue = Value;
- // Give up if the combined value type is smaller than the store size.
- if (CombinedValue.getValueSizeInBits() < VT.getSizeInBits())
+ else if (SourceValue.getValueType() != WideVT) {
+ if (WideVal.getValueType() == WideVT ||
+ WideVal.getScalarValueSizeInBits() >
+ SourceValue.getScalarValueSizeInBits())
+ SourceValue = WideVal;
+ // Give up if the source value type is smaller than the store size.
+ if (SourceValue.getScalarValueSizeInBits() < WideVT.getScalarSizeInBits())
return SDValue();
}
- // Stores must share the same base address
+ // Stores must share the same base address.
BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG);
int64_t ByteOffsetFromBase = 0;
if (!Base)
@@ -6928,60 +7180,78 @@ SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) {
else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
return SDValue();
- // Remember the first byte store
+ // Remember the first store.
if (ByteOffsetFromBase < FirstOffset) {
FirstStore = Store;
FirstOffset = ByteOffsetFromBase;
}
// Map the offset in the store and the offset in the combined value, and
// early return if it has been set before.
- if (Offset < 0 || Offset >= Width || ByteOffsets[Offset] != INT64_MAX)
+ if (Offset < 0 || Offset >= NumStores || OffsetMap[Offset] != INT64_MAX)
return SDValue();
- ByteOffsets[Offset] = ByteOffsetFromBase;
+ OffsetMap[Offset] = ByteOffsetFromBase;
}
assert(FirstOffset != INT64_MAX && "First byte offset must be set");
assert(FirstStore && "First store must be set");
- // Check if the bytes of the combined value we are looking at match with
- // either big or little endian value store.
- Optional<bool> IsBigEndian = isBigEndian(ByteOffsets, FirstOffset);
- if (!IsBigEndian.hasValue())
- return SDValue();
-
- // The node we are looking at matches with the pattern, check if we can
- // replace it with a single bswap if needed and store.
-
- // If the store needs byte swap check if the target supports it
- bool NeedsBswap = DAG.getDataLayout().isBigEndian() != *IsBigEndian;
-
- // Before legalize we can introduce illegal bswaps which will be later
- // converted to an explicit bswap sequence. This way we end up with a single
- // store and byte shuffling instead of several stores and byte shuffling.
- if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
- return SDValue();
-
// Check that a store of the wide type is both allowed and fast on the target
+ const DataLayout &Layout = DAG.getDataLayout();
bool Fast = false;
- bool Allowed =
- TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
- *FirstStore->getMemOperand(), &Fast);
+ bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT,
+ *FirstStore->getMemOperand(), &Fast);
if (!Allowed || !Fast)
return SDValue();
- if (VT != CombinedValue.getValueType()) {
- assert(CombinedValue.getValueType().getSizeInBits() > VT.getSizeInBits() &&
- "Get unexpected store value to combine");
- CombinedValue = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT,
- CombinedValue);
+ // Check if the pieces of the value are going to the expected places in memory
+ // to merge the stores.
+ auto checkOffsets = [&](bool MatchLittleEndian) {
+ if (MatchLittleEndian) {
+ for (unsigned i = 0; i != NumStores; ++i)
+ if (OffsetMap[i] != i * (NarrowNumBits / 8) + FirstOffset)
+ return false;
+ } else { // MatchBigEndian by reversing loop counter.
+ for (unsigned i = 0, j = NumStores - 1; i != NumStores; ++i, --j)
+ if (OffsetMap[j] != i * (NarrowNumBits / 8) + FirstOffset)
+ return false;
+ }
+ return true;
+ };
+
+ // Check if the offsets line up for the native data layout of this target.
+ bool NeedBswap = false;
+ bool NeedRotate = false;
+ if (!checkOffsets(Layout.isLittleEndian())) {
+ // Special-case: check if byte offsets line up for the opposite endian.
+ if (NarrowNumBits == 8 && checkOffsets(Layout.isBigEndian()))
+ NeedBswap = true;
+ else if (NumStores == 2 && checkOffsets(Layout.isBigEndian()))
+ NeedRotate = true;
+ else
+ return SDValue();
}
- if (NeedsBswap)
- CombinedValue = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, CombinedValue);
+ SDLoc DL(N);
+ if (WideVT != SourceValue.getValueType()) {
+ assert(SourceValue.getValueType().getScalarSizeInBits() > WideNumBits &&
+ "Unexpected store value to merge");
+ SourceValue = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SourceValue);
+ }
+
+ // Before legalize we can introduce illegal bswaps/rotates which will be later
+ // converted to an explicit bswap sequence. This way we end up with a single
+ // store and byte shuffling instead of several stores and byte shuffling.
+ if (NeedBswap) {
+ SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue);
+ } else if (NeedRotate) {
+ assert(WideNumBits % 2 == 0 && "Unexpected type for rotate");
+ SDValue RotAmt = DAG.getConstant(WideNumBits / 2, DL, WideVT);
+ SourceValue = DAG.getNode(ISD::ROTR, DL, WideVT, SourceValue, RotAmt);
+ }
SDValue NewStore =
- DAG.getStore(Chain, SDLoc(N), CombinedValue, FirstStore->getBasePtr(),
- FirstStore->getPointerInfo(), FirstStore->getAlignment());
+ DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(),
+ FirstStore->getPointerInfo(), FirstStore->getAlign());
// Rely on other DAG combine rules to remove the other individual stores.
DAG.ReplaceAllUsesWith(N, NewStore.getNode());
@@ -7036,8 +7306,8 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
"can only analyze providers for individual bytes not bit");
unsigned LoadByteWidth = LoadBitWidth / 8;
return IsBigEndianTarget
- ? BigEndianByteAt(LoadByteWidth, P.ByteOffset)
- : LittleEndianByteAt(LoadByteWidth, P.ByteOffset);
+ ? bigEndianByteAt(LoadByteWidth, P.ByteOffset)
+ : littleEndianByteAt(LoadByteWidth, P.ByteOffset);
};
Optional<BaseIndexOffset> Base;
@@ -7164,10 +7434,10 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
if (!Allowed || !Fast)
return SDValue();
- SDValue NewLoad = DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD,
- SDLoc(N), VT, Chain, FirstLoad->getBasePtr(),
- FirstLoad->getPointerInfo(), MemVT,
- FirstLoad->getAlignment());
+ SDValue NewLoad =
+ DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(N), VT,
+ Chain, FirstLoad->getBasePtr(),
+ FirstLoad->getPointerInfo(), MemVT, FirstLoad->getAlign());
// Transfer chain users from old loads to the new load.
for (LoadSDNode *L : Loads)
@@ -7337,9 +7607,9 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
if (N0.hasOneUse()) {
// FIXME Can we handle multiple uses? Could we token factor the chain
// results from the new/old setcc?
- SDValue SetCC = DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
- N0.getOperand(0),
- N0Opcode == ISD::STRICT_FSETCCS);
+ SDValue SetCC =
+ DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
+ N0.getOperand(0), N0Opcode == ISD::STRICT_FSETCCS);
CombineTo(N, SetCC);
DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
recursivelyDeleteUnusedNodes(N0.getNode());
@@ -7440,12 +7710,10 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
SDValue S0 = S.getOperand(0);
- if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0)) {
- unsigned OpSizeInBits = VT.getScalarSizeInBits();
+ if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0))
if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
- if (C->getAPIntValue() == (OpSizeInBits - 1))
+ if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
return DAG.getNode(ISD::ABS, DL, VT, S0);
- }
}
}
@@ -7980,10 +8248,9 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
// Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
if (N0.getOpcode() == ISD::VSCALE)
if (ConstantSDNode *NC1 = isConstOrConstSplat(N->getOperand(1))) {
- auto DL = SDLoc(N);
- APInt C0 = N0.getConstantOperandAPInt(0);
- APInt C1 = NC1->getAPIntValue();
- return DAG.getVScale(DL, VT, C0 << C1);
+ const APInt &C0 = N0.getConstantOperandAPInt(0);
+ const APInt &C1 = NC1->getAPIntValue();
+ return DAG.getVScale(SDLoc(N), VT, C0 << C1);
}
return SDValue();
@@ -8032,12 +8299,6 @@ static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,
if (NarrowVT != RightOp.getOperand(0).getValueType())
return SDValue();
- // Only transform into mulh if mulh for the narrow type is cheaper than
- // a multiply followed by a shift. This should also check if mulh is
- // legal for NarrowVT on the target.
- if (!TLI.isMulhCheaperThanMulShift(NarrowVT))
- return SDValue();
-
// Proceed with the transformation if the wide type is twice as large
// as the narrow type.
unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
@@ -8055,6 +8316,10 @@ static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,
// we use mulhs. Othewise, zero extends (zext) use mulhu.
unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;
+ // Combine to mulh if mulh is legal/custom for the narrow type on the target.
+ if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT))
+ return SDValue();
+
SDValue Result = DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0),
RightOp.getOperand(0));
return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT1)
@@ -8556,8 +8821,8 @@ SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
RHS->getAddressSpace(), NewAlign,
RHS->getMemOperand()->getFlags(), &Fast) &&
Fast) {
- SDValue NewPtr =
- DAG.getMemBasePlusOffset(RHS->getBasePtr(), PtrOff, DL);
+ SDValue NewPtr = DAG.getMemBasePlusOffset(
+ RHS->getBasePtr(), TypeSize::Fixed(PtrOff), DL);
AddToWorklist(NewPtr.getNode());
SDValue Load = DAG.getLoad(
VT, DL, RHS->getChain(), NewPtr,
@@ -9154,16 +9419,75 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
}
+bool refineUniformBase(SDValue &BasePtr, SDValue &Index, SelectionDAG &DAG) {
+ if (!isNullConstant(BasePtr) || Index.getOpcode() != ISD::ADD)
+ return false;
+
+ // For now we check only the LHS of the add.
+ SDValue LHS = Index.getOperand(0);
+ SDValue SplatVal = DAG.getSplatValue(LHS);
+ if (!SplatVal)
+ return false;
+
+ BasePtr = SplatVal;
+ Index = Index.getOperand(1);
+ return true;
+}
+
+// Fold sext/zext of index into index type.
+bool refineIndexType(MaskedGatherScatterSDNode *MGS, SDValue &Index,
+ bool Scaled, SelectionDAG &DAG) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ if (Index.getOpcode() == ISD::ZERO_EXTEND) {
+ SDValue Op = Index.getOperand(0);
+ MGS->setIndexType(Scaled ? ISD::UNSIGNED_SCALED : ISD::UNSIGNED_UNSCALED);
+ if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) {
+ Index = Op;
+ return true;
+ }
+ }
+
+ if (Index.getOpcode() == ISD::SIGN_EXTEND) {
+ SDValue Op = Index.getOperand(0);
+ MGS->setIndexType(Scaled ? ISD::SIGNED_SCALED : ISD::SIGNED_UNSCALED);
+ if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) {
+ Index = Op;
+ return true;
+ }
+ }
+
+ return false;
+}
+
SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
SDValue Mask = MSC->getMask();
SDValue Chain = MSC->getChain();
+ SDValue Index = MSC->getIndex();
+ SDValue Scale = MSC->getScale();
+ SDValue StoreVal = MSC->getValue();
+ SDValue BasePtr = MSC->getBasePtr();
SDLoc DL(N);
// Zap scatters with a zero mask.
if (ISD::isBuildVectorAllZeros(Mask.getNode()))
return Chain;
+ if (refineUniformBase(BasePtr, Index, DAG)) {
+ SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
+ return DAG.getMaskedScatter(
+ DAG.getVTList(MVT::Other), StoreVal.getValueType(), DL, Ops,
+ MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore());
+ }
+
+ if (refineIndexType(MSC, Index, MSC->isIndexScaled(), DAG)) {
+ SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
+ return DAG.getMaskedScatter(
+ DAG.getVTList(MVT::Other), StoreVal.getValueType(), DL, Ops,
+ MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore());
+ }
+
return SDValue();
}
@@ -9177,6 +9501,14 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
if (ISD::isBuildVectorAllZeros(Mask.getNode()))
return Chain;
+ // If this is a masked load with an all ones mask, we can use a unmasked load.
+ // FIXME: Can we do this for indexed, compressing, or truncating stores?
+ if (ISD::isBuildVectorAllOnes(Mask.getNode()) &&
+ MST->isUnindexed() && !MST->isCompressingStore() &&
+ !MST->isTruncatingStore())
+ return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
+ MST->getBasePtr(), MST->getMemOperand());
+
// Try transforming N to an indexed store.
if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
return SDValue(N, 0);
@@ -9187,11 +9519,32 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
SDValue DAGCombiner::visitMGATHER(SDNode *N) {
MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
SDValue Mask = MGT->getMask();
+ SDValue Chain = MGT->getChain();
+ SDValue Index = MGT->getIndex();
+ SDValue Scale = MGT->getScale();
+ SDValue PassThru = MGT->getPassThru();
+ SDValue BasePtr = MGT->getBasePtr();
SDLoc DL(N);
// Zap gathers with a zero mask.
if (ISD::isBuildVectorAllZeros(Mask.getNode()))
- return CombineTo(N, MGT->getPassThru(), MGT->getChain());
+ return CombineTo(N, PassThru, MGT->getChain());
+
+ if (refineUniformBase(BasePtr, Index, DAG)) {
+ SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
+ return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other),
+ PassThru.getValueType(), DL, Ops,
+ MGT->getMemOperand(), MGT->getIndexType(),
+ MGT->getExtensionType());
+ }
+
+ if (refineIndexType(MGT, Index, MGT->isIndexScaled(), DAG)) {
+ SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
+ return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other),
+ PassThru.getValueType(), DL, Ops,
+ MGT->getMemOperand(), MGT->getIndexType(),
+ MGT->getExtensionType());
+ }
return SDValue();
}
@@ -9205,6 +9558,16 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
if (ISD::isBuildVectorAllZeros(Mask.getNode()))
return CombineTo(N, MLD->getPassThru(), MLD->getChain());
+ // If this is a masked load with an all ones mask, we can use a unmasked load.
+ // FIXME: Can we do this for indexed, expanding, or extending loads?
+ if (ISD::isBuildVectorAllOnes(Mask.getNode()) &&
+ MLD->isUnindexed() && !MLD->isExpandingLoad() &&
+ MLD->getExtensionType() == ISD::NON_EXTLOAD) {
+ SDValue NewLd = DAG.getLoad(N->getValueType(0), SDLoc(N), MLD->getChain(),
+ MLD->getBasePtr(), MLD->getMemOperand());
+ return CombineTo(N, NewLd, NewLd.getValue(1));
+ }
+
// Try transforming N to an indexed load.
if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
return SDValue(N, 0);
@@ -9364,6 +9727,113 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
}
}
+
+ // Match VSELECTs into add with unsigned saturation.
+ if (hasOperation(ISD::UADDSAT, VT)) {
+ // Check if one of the arms of the VSELECT is vector with all bits set.
+ // If it's on the left side invert the predicate to simplify logic below.
+ SDValue Other;
+ ISD::CondCode SatCC = CC;
+ if (ISD::isBuildVectorAllOnes(N1.getNode())) {
+ Other = N2;
+ SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
+ } else if (ISD::isBuildVectorAllOnes(N2.getNode())) {
+ Other = N1;
+ }
+
+ if (Other && Other.getOpcode() == ISD::ADD) {
+ SDValue CondLHS = LHS, CondRHS = RHS;
+ SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
+
+ // Canonicalize condition operands.
+ if (SatCC == ISD::SETUGE) {
+ std::swap(CondLHS, CondRHS);
+ SatCC = ISD::SETULE;
+ }
+
+ // We can test against either of the addition operands.
+ // x <= x+y ? x+y : ~0 --> uaddsat x, y
+ // x+y >= x ? x+y : ~0 --> uaddsat x, y
+ if (SatCC == ISD::SETULE && Other == CondRHS &&
+ (OpLHS == CondLHS || OpRHS == CondLHS))
+ return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
+
+ if (isa<BuildVectorSDNode>(OpRHS) && isa<BuildVectorSDNode>(CondRHS) &&
+ CondLHS == OpLHS) {
+ // If the RHS is a constant we have to reverse the const
+ // canonicalization.
+ // x >= ~C ? x+C : ~0 --> uaddsat x, C
+ auto MatchUADDSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
+ return Cond->getAPIntValue() == ~Op->getAPIntValue();
+ };
+ if (SatCC == ISD::SETULE &&
+ ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT))
+ return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
+ }
+ }
+ }
+
+ // Match VSELECTs into sub with unsigned saturation.
+ if (hasOperation(ISD::USUBSAT, VT)) {
+ // Check if one of the arms of the VSELECT is a zero vector. If it's on
+ // the left side invert the predicate to simplify logic below.
+ SDValue Other;
+ ISD::CondCode SatCC = CC;
+ if (ISD::isBuildVectorAllZeros(N1.getNode())) {
+ Other = N2;
+ SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
+ } else if (ISD::isBuildVectorAllZeros(N2.getNode())) {
+ Other = N1;
+ }
+
+ if (Other && Other.getNumOperands() == 2 && Other.getOperand(0) == LHS) {
+ SDValue CondRHS = RHS;
+ SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
+
+ // Look for a general sub with unsigned saturation first.
+ // x >= y ? x-y : 0 --> usubsat x, y
+ // x > y ? x-y : 0 --> usubsat x, y
+ if ((SatCC == ISD::SETUGE || SatCC == ISD::SETUGT) &&
+ Other.getOpcode() == ISD::SUB && OpRHS == CondRHS)
+ return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
+
+ if (auto *OpRHSBV = dyn_cast<BuildVectorSDNode>(OpRHS)) {
+ if (isa<BuildVectorSDNode>(CondRHS)) {
+ // If the RHS is a constant we have to reverse the const
+ // canonicalization.
+ // x > C-1 ? x+-C : 0 --> usubsat x, C
+ auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
+ return (!Op && !Cond) ||
+ (Op && Cond &&
+ Cond->getAPIntValue() == (-Op->getAPIntValue() - 1));
+ };
+ if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD &&
+ ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT,
+ /*AllowUndefs*/ true)) {
+ OpRHS = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
+ OpRHS);
+ return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
+ }
+
+ // Another special case: If C was a sign bit, the sub has been
+ // canonicalized into a xor.
+ // FIXME: Would it be better to use computeKnownBits to determine
+ // whether it's safe to decanonicalize the xor?
+ // x s< 0 ? x^C : 0 --> usubsat x, C
+ if (auto *OpRHSConst = OpRHSBV->getConstantSplatNode()) {
+ if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
+ ISD::isBuildVectorAllZeros(CondRHS.getNode()) &&
+ OpRHSConst->getAPIntValue().isSignMask()) {
+ // Note that we have to rebuild the RHS constant here to ensure
+ // we don't rely on particular values of undef lanes.
+ OpRHS = DAG.getConstant(OpRHSConst->getAPIntValue(), DL, VT);
+ return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
+ }
+ }
+ }
+ }
+ }
+ }
}
if (SimplifySelectOps(N, N1, N2))
@@ -9722,14 +10192,14 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
SDValue BasePtr = LN0->getBasePtr();
for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
const unsigned Offset = Idx * Stride;
- const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
+ const Align Align = commonAlignment(LN0->getAlign(), Offset);
SDValue SplitLoad = DAG.getExtLoad(
ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,
LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
- BasePtr = DAG.getMemBasePlusOffset(BasePtr, Stride, DL);
+ BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::Fixed(Stride), DL);
Loads.push_back(SplitLoad.getValue(0));
Chains.push_back(SplitLoad.getValue(1));
@@ -10146,7 +10616,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
SDValue N00 = N0.getOperand(0);
SDValue N01 = N0.getOperand(1);
ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
- EVT N00VT = N0.getOperand(0).getValueType();
+ EVT N00VT = N00.getValueType();
// sext(setcc) -> sext_in_reg(vsetcc) for vectors.
// Only do this before legalize for now.
@@ -10240,6 +10710,29 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
}
+ // fold sext (not i1 X) -> add (zext i1 X), -1
+ // TODO: This could be extended to handle bool vectors.
+ if (N0.getValueType() == MVT::i1 && isBitwiseNot(N0) && N0.hasOneUse() &&
+ (!LegalOperations || (TLI.isOperationLegal(ISD::ZERO_EXTEND, VT) &&
+ TLI.isOperationLegal(ISD::ADD, VT)))) {
+ // If we can eliminate the 'not', the sext form should be better
+ if (SDValue NewXor = visitXOR(N0.getNode())) {
+ // Returning N0 is a form of in-visit replacement that may have
+ // invalidated N0.
+ if (NewXor.getNode() == N0.getNode()) {
+ // Return SDValue here as the xor should have already been replaced in
+ // this sext.
+ return SDValue();
+ } else {
+ // Return a new sext with the new xor.
+ return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor);
+ }
+ }
+
+ SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
+ return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
+ }
+
return SDValue();
}
@@ -10507,13 +11000,16 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
N0.getValueType());
}
- // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
+ // zext(setcc x,y,cc) -> zext(select x, y, true, false, cc)
SDLoc DL(N);
+ EVT N0VT = N0.getValueType();
+ EVT N00VT = N0.getOperand(0).getValueType();
if (SDValue SCC = SimplifySelectCC(
- DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
- DAG.getConstant(0, DL, VT),
+ DL, N0.getOperand(0), N0.getOperand(1),
+ DAG.getBoolConstant(true, DL, N0VT, N00VT),
+ DAG.getBoolConstant(false, DL, N0VT, N00VT),
cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
- return SCC;
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SCC);
}
// (zext (shl (zext x), cst)) -> (shl (zext x), cst)
@@ -10602,22 +11098,26 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
// fold (aext (load x)) -> (aext (truncate (extload x)))
// None of the supported targets knows how to perform load and any_ext
- // on vectors in one instruction. We only perform this transformation on
- // scalars.
- if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
- ISD::isUNINDEXEDLoad(N0.getNode()) &&
- TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
+ // on vectors in one instruction, so attempt to fold to zext instead.
+ if (VT.isVector()) {
+ // Try to simplify (zext (load x)).
+ if (SDValue foldedExt =
+ tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
+ ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
+ return foldedExt;
+ } else if (ISD::isNON_EXTLoad(N0.getNode()) &&
+ ISD::isUNINDEXEDLoad(N0.getNode()) &&
+ TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
bool DoXform = true;
- SmallVector<SDNode*, 4> SetCCs;
+ SmallVector<SDNode *, 4> SetCCs;
if (!N0.hasOneUse())
- DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs,
- TLI);
+ DoXform =
+ ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
if (DoXform) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
- LN0->getChain(),
- LN0->getBasePtr(), N0.getValueType(),
- LN0->getMemOperand());
+ LN0->getChain(), LN0->getBasePtr(),
+ N0.getValueType(), LN0->getMemOperand());
ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
// If the load value is used only by N, replace it via CombineTo N.
bool NoReplaceTrunc = N0.hasOneUse();
@@ -10626,8 +11126,8 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
recursivelyDeleteUnusedNodes(LN0);
} else {
- SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
- N0.getValueType(), ExtLoad);
+ SDValue Trunc =
+ DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
CombineTo(LN0, Trunc, ExtLoad.getValue(1));
}
return SDValue(N, 0); // Return N so it doesn't get rechecked!
@@ -10832,12 +11332,12 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
return SDValue();
uint64_t ShiftAmt = N01->getZExtValue();
- uint64_t MemoryWidth = LN0->getMemoryVT().getSizeInBits();
+ uint64_t MemoryWidth = LN0->getMemoryVT().getScalarSizeInBits();
if (LN0->getExtensionType() != ISD::SEXTLOAD && MemoryWidth > ShiftAmt)
ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShiftAmt);
else
ExtVT = EVT::getIntegerVT(*DAG.getContext(),
- VT.getSizeInBits() - ShiftAmt);
+ VT.getScalarSizeInBits() - ShiftAmt);
} else if (Opc == ISD::AND) {
// An AND with a constant mask is the same as a truncate + zero-extend.
auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
@@ -10864,12 +11364,12 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
SDValue SRL = N0;
if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) {
ShAmt = ConstShift->getZExtValue();
- unsigned EVTBits = ExtVT.getSizeInBits();
+ unsigned EVTBits = ExtVT.getScalarSizeInBits();
// Is the shift amount a multiple of size of VT?
if ((ShAmt & (EVTBits-1)) == 0) {
N0 = N0.getOperand(0);
// Is the load width a multiple of size of VT?
- if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0)
+ if ((N0.getScalarValueSizeInBits() & (EVTBits - 1)) != 0)
return SDValue();
}
@@ -10899,7 +11399,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
ShiftMask.countTrailingOnes());
// If the mask is smaller, recompute the type.
- if ((ExtVT.getSizeInBits() > MaskedVT.getSizeInBits()) &&
+ if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) &&
TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT))
ExtVT = MaskedVT;
}
@@ -10930,8 +11430,9 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
return SDValue();
auto AdjustBigEndianShift = [&](unsigned ShAmt) {
- unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
- unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
+ unsigned LVTStoreBits =
+ LN0->getMemoryVT().getStoreSizeInBits().getFixedSize();
+ unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedSize();
return LVTStoreBits - EVTStoreBits - ShAmt;
};
@@ -10941,13 +11442,13 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
ShAmt = AdjustBigEndianShift(ShAmt);
uint64_t PtrOff = ShAmt / 8;
- unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
+ Align NewAlign = commonAlignment(LN0->getAlign(), PtrOff);
SDLoc DL(LN0);
// The original load itself didn't wrap, so an offset within it doesn't.
SDNodeFlags Flags;
Flags.setNoUnsignedWrap(true);
- SDValue NewPtr =
- DAG.getMemBasePlusOffset(LN0->getBasePtr(), PtrOff, DL, Flags);
+ SDValue NewPtr = DAG.getMemBasePlusOffset(LN0->getBasePtr(),
+ TypeSize::Fixed(PtrOff), DL, Flags);
AddToWorklist(NewPtr.getNode());
SDValue Load;
@@ -10969,13 +11470,13 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
SDValue Result = Load;
if (ShLeftAmt != 0) {
EVT ShImmTy = getShiftAmountTy(Result.getValueType());
- if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
+ if (!isUIntN(ShImmTy.getScalarSizeInBits(), ShLeftAmt))
ShImmTy = VT;
// If the shift amount is as large as the result size (but, presumably,
// no larger than the source) then the useful bits of the result are
// zero; we can't simply return the shortened shift, because the result
// of that operation is undefined.
- if (ShLeftAmt >= VT.getSizeInBits())
+ if (ShLeftAmt >= VT.getScalarSizeInBits())
Result = DAG.getConstant(0, DL, VT);
else
Result = DAG.getNode(ISD::SHL, DL, VT,
@@ -11125,6 +11626,41 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
+ // fold (sext_inreg (masked_load x)) -> (sext_masked_load x)
+ // ignore it if the masked load is already sign extended
+ if (MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0)) {
+ if (ExtVT == Ld->getMemoryVT() && N0.hasOneUse() &&
+ Ld->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD &&
+ TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT)) {
+ SDValue ExtMaskedLoad = DAG.getMaskedLoad(
+ VT, SDLoc(N), Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(),
+ Ld->getMask(), Ld->getPassThru(), ExtVT, Ld->getMemOperand(),
+ Ld->getAddressingMode(), ISD::SEXTLOAD, Ld->isExpandingLoad());
+ CombineTo(N, ExtMaskedLoad);
+ CombineTo(N0.getNode(), ExtMaskedLoad, ExtMaskedLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
+ // fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x)
+ if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
+ if (SDValue(GN0, 0).hasOneUse() &&
+ ExtVT == GN0->getMemoryVT() &&
+ TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
+ SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
+ GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
+
+ SDValue ExtLoad = DAG.getMaskedGather(
+ DAG.getVTList(VT, MVT::Other), ExtVT, SDLoc(N), Ops,
+ GN0->getMemOperand(), GN0->getIndexType(), ISD::SEXTLOAD);
+
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+ AddToWorklist(ExtLoad.getNode());
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+
// Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) {
if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
@@ -11225,10 +11761,11 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
EVT ExTy = N0.getValueType();
EVT TrTy = N->getValueType(0);
- unsigned NumElem = VecTy.getVectorNumElements();
+ auto EltCnt = VecTy.getVectorElementCount();
unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
+ auto NewEltCnt = EltCnt * SizeRatio;
- EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
+ EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, NewEltCnt);
assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
SDValue EltNo = N0->getOperand(1);
@@ -11342,8 +11879,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
// after truncation.
if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- if (LN0->isSimple() &&
- LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) {
+ if (LN0->isSimple() && LN0->getMemoryVT().bitsLT(VT)) {
SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
VT, LN0->getChain(), LN0->getBasePtr(),
LN0->getMemoryVT(),
@@ -11416,8 +11952,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
}
// Simplify the operands using demanded-bits information.
- if (!VT.isVector() &&
- SimplifyDemandedBits(SDValue(N, 0)))
+ if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
// (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
@@ -11644,7 +12179,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
*LN0->getMemOperand())) {
SDValue Load =
DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
- LN0->getPointerInfo(), LN0->getAlignment(),
+ LN0->getPointerInfo(), LN0->getAlign(),
LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
return Load;
@@ -11991,7 +12526,6 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
if (!HasFMAD && !HasFMA)
return SDValue();
- SDNodeFlags Flags = N->getFlags();
bool CanFuse = Options.UnsafeFPMath || isContractable(N);
bool CanReassociate =
Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
@@ -12024,15 +12558,15 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
// fold (fadd (fmul x, y), z) -> (fma x, y, z)
if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- N0.getOperand(0), N0.getOperand(1), N1, Flags);
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
+ N0.getOperand(1), N1);
}
// fold (fadd x, (fmul y, z)) -> (fma y, z, x)
// Note: Commutes FADD operands.
if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- N1.getOperand(0), N1.getOperand(1), N0, Flags);
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0),
+ N1.getOperand(1), N0);
}
// fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
@@ -12055,8 +12589,8 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
SDValue B = FMA.getOperand(1);
SDValue C = FMA.getOperand(2).getOperand(0);
SDValue D = FMA.getOperand(2).getOperand(1);
- SDValue CDE = DAG.getNode(PreferredFusedOpcode, SL, VT, C, D, E, Flags);
- return DAG.getNode(PreferredFusedOpcode, SL, VT, A, B, CDE, Flags);
+ SDValue CDE = DAG.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, A, B, CDE);
}
// Look through FP_EXTEND nodes to do more combining.
@@ -12068,10 +12602,9 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N00.getValueType())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N00.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N00.getOperand(1)), N1, Flags);
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
+ N1);
}
}
@@ -12083,10 +12616,9 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N10.getValueType())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N10.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N10.getOperand(1)), N0, Flags);
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)),
+ N0);
}
}
@@ -12094,14 +12626,13 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
if (Aggressive) {
// fold (fadd (fma x, y, (fpext (fmul u, v))), z)
// -> (fma x, y, (fma (fpext u), (fpext v), z))
- auto FoldFAddFMAFPExtFMul = [&] (
- SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
- SDNodeFlags Flags) {
+ auto FoldFAddFMAFPExtFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
+ SDValue Z) {
return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
- Z, Flags), Flags);
+ Z));
};
if (N0.getOpcode() == PreferredFusedOpcode) {
SDValue N02 = N0.getOperand(2);
@@ -12112,7 +12643,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
N020.getValueType())) {
return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
N020.getOperand(0), N020.getOperand(1),
- N1, Flags);
+ N1);
}
}
}
@@ -12122,16 +12653,14 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
// FIXME: This turns two single-precision and one double-precision
// operation into two double-precision operations, which might not be
// interesting for all targets, especially GPUs.
- auto FoldFAddFPExtFMAFMul = [&] (
- SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
- SDNodeFlags Flags) {
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
- DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
- DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
- Z, Flags), Flags);
+ auto FoldFAddFPExtFMAFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
+ SDValue Z) {
+ return DAG.getNode(
+ PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
};
if (N0.getOpcode() == ISD::FP_EXTEND) {
SDValue N00 = N0.getOperand(0);
@@ -12142,7 +12671,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
N00.getValueType())) {
return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
N002.getOperand(0), N002.getOperand(1),
- N1, Flags);
+ N1);
}
}
}
@@ -12158,7 +12687,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
N120.getValueType())) {
return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
N120.getOperand(0), N120.getOperand(1),
- N0, Flags);
+ N0);
}
}
}
@@ -12177,7 +12706,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
N10.getValueType())) {
return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
N102.getOperand(0), N102.getOperand(1),
- N0, Flags);
+ N0);
}
}
}
@@ -12235,8 +12764,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
- XY.getOperand(1), DAG.getNode(ISD::FNEG, SL, VT, Z),
- Flags);
+ XY.getOperand(1), DAG.getNode(ISD::FNEG, SL, VT, Z));
}
return SDValue();
};
@@ -12247,7 +12775,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
- YZ.getOperand(1), X, Flags);
+ YZ.getOperand(1), X);
}
return SDValue();
};
@@ -12278,7 +12806,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
SDValue N01 = N0.getOperand(0).getOperand(1);
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
- DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
+ DAG.getNode(ISD::FNEG, SL, VT, N1));
}
// Look through FP_EXTEND nodes to do more combining.
@@ -12291,11 +12819,9 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N00.getValueType())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N00.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N00.getOperand(1)),
- DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
+ DAG.getNode(ISD::FNEG, SL, VT, N1));
}
}
@@ -12307,13 +12833,11 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
if (isContractableFMUL(N10) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N10.getValueType())) {
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FNEG, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N10.getOperand(0))),
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N10.getOperand(1)),
- N0, Flags);
+ return DAG.getNode(
+ PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0))),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
}
}
@@ -12330,13 +12854,12 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
if (isContractableFMUL(N000) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N00.getValueType())) {
- return DAG.getNode(ISD::FNEG, SL, VT,
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N000.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N000.getOperand(1)),
- N1, Flags));
+ return DAG.getNode(
+ ISD::FNEG, SL, VT,
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
+ N1));
}
}
}
@@ -12354,13 +12877,12 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
if (isContractableFMUL(N000) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N000.getValueType())) {
- return DAG.getNode(ISD::FNEG, SL, VT,
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N000.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N000.getOperand(1)),
- N1, Flags));
+ return DAG.getNode(
+ ISD::FNEG, SL, VT,
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
+ N1));
}
}
}
@@ -12372,13 +12894,12 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
if (CanFuse && N0.getOpcode() == PreferredFusedOpcode &&
isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&
N0.getOperand(2)->hasOneUse()) {
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- N0.getOperand(0), N0.getOperand(1),
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
+ N0.getOperand(1),
DAG.getNode(PreferredFusedOpcode, SL, VT,
N0.getOperand(2).getOperand(0),
N0.getOperand(2).getOperand(1),
- DAG.getNode(ISD::FNEG, SL, VT,
- N1), Flags), Flags);
+ DAG.getNode(ISD::FNEG, SL, VT, N1)));
}
// fold (fsub x, (fma y, z, (fmul u, v)))
@@ -12388,13 +12909,11 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
N1->hasOneUse() && NoSignedZero) {
SDValue N20 = N1.getOperand(2).getOperand(0);
SDValue N21 = N1.getOperand(2).getOperand(1);
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FNEG, SL, VT,
- N1.getOperand(0)),
- N1.getOperand(1),
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FNEG, SL, VT, N20),
- N21, N0, Flags), Flags);
+ return DAG.getNode(
+ PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1),
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT, N20), N21, N0));
}
@@ -12408,15 +12927,13 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
if (isContractableFMUL(N020) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N020.getValueType())) {
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- N0.getOperand(0), N0.getOperand(1),
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N020.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N020.getOperand(1)),
- DAG.getNode(ISD::FNEG, SL, VT,
- N1), Flags), Flags);
+ return DAG.getNode(
+ PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
+ DAG.getNode(
+ PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)),
+ DAG.getNode(ISD::FNEG, SL, VT, N1)));
}
}
}
@@ -12434,18 +12951,15 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
if (isContractableFMUL(N002) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N00.getValueType())) {
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N00.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N00.getOperand(1)),
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N002.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N002.getOperand(1)),
- DAG.getNode(ISD::FNEG, SL, VT,
- N1), Flags), Flags);
+ return DAG.getNode(
+ PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
+ DAG.getNode(
+ PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)),
+ DAG.getNode(ISD::FNEG, SL, VT, N1)));
}
}
}
@@ -12461,16 +12975,13 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
N120.getValueType())) {
SDValue N1200 = N120.getOperand(0);
SDValue N1201 = N120.getOperand(1);
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
- N1.getOperand(1),
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FNEG, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL,
- VT, N1200)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N1201),
- N0, Flags), Flags);
+ return DAG.getNode(
+ PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1),
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N1200)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0));
}
}
@@ -12491,18 +13002,15 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
CvtSrc.getValueType())) {
SDValue N1020 = N102.getOperand(0);
SDValue N1021 = N102.getOperand(1);
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FNEG, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N100)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FNEG, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL,
- VT, N1020)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N1021),
- N0, Flags), Flags);
+ return DAG.getNode(
+ PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N100)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N1020)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0));
}
}
}
@@ -12518,7 +13026,6 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
SDLoc SL(N);
- const SDNodeFlags Flags = N->getFlags();
assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
@@ -12550,56 +13057,56 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
// fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
// fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
- auto FuseFADD = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
+ auto FuseFADD = [&](SDValue X, SDValue Y) {
if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
if (C->isExactlyValue(+1.0))
return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
- Y, Flags);
+ Y);
if (C->isExactlyValue(-1.0))
return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
- DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
+ DAG.getNode(ISD::FNEG, SL, VT, Y));
}
}
return SDValue();
};
- if (SDValue FMA = FuseFADD(N0, N1, Flags))
+ if (SDValue FMA = FuseFADD(N0, N1))
return FMA;
- if (SDValue FMA = FuseFADD(N1, N0, Flags))
+ if (SDValue FMA = FuseFADD(N1, N0))
return FMA;
// fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
// fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
// fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
// fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
- auto FuseFSUB = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
+ auto FuseFSUB = [&](SDValue X, SDValue Y) {
if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
if (C0->isExactlyValue(+1.0))
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
- Y, Flags);
+ Y);
if (C0->isExactlyValue(-1.0))
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
- DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
+ DAG.getNode(ISD::FNEG, SL, VT, Y));
}
if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
if (C1->isExactlyValue(+1.0))
return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
- DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
+ DAG.getNode(ISD::FNEG, SL, VT, Y));
if (C1->isExactlyValue(-1.0))
return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
- Y, Flags);
+ Y);
}
}
return SDValue();
};
- if (SDValue FMA = FuseFSUB(N0, N1, Flags))
+ if (SDValue FMA = FuseFSUB(N0, N1))
return FMA;
- if (SDValue FMA = FuseFSUB(N1, N0, Flags))
+ if (SDValue FMA = FuseFSUB(N1, N0))
return FMA;
return SDValue();
@@ -12608,12 +13115,13 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
SDValue DAGCombiner::visitFADD(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
- bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
+ bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0);
+ bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
EVT VT = N->getValueType(0);
SDLoc DL(N);
const TargetOptions &Options = DAG.getTarget().Options;
- const SDNodeFlags Flags = N->getFlags();
+ SDNodeFlags Flags = N->getFlags();
+ SelectionDAG::FlagInserter FlagsInserter(DAG, N);
if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
return R;
@@ -12625,11 +13133,11 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
// fold (fadd c1, c2) -> c1 + c2
if (N0CFP && N1CFP)
- return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags);
+ return DAG.getNode(ISD::FADD, DL, VT, N0, N1);
// canonicalize constant to RHS
if (N0CFP && !N1CFP)
- return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
+ return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
// N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
@@ -12644,13 +13152,13 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
N1, DAG, LegalOperations, ForCodeSize))
- return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1, Flags);
+ return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1);
// fold (fadd (fneg A), B) -> (fsub B, A)
if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
N0, DAG, LegalOperations, ForCodeSize))
- return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0, Flags);
+ return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0);
auto isFMulNegTwo = [](SDValue FMul) {
if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
@@ -12662,14 +13170,14 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
// fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
if (isFMulNegTwo(N0)) {
SDValue B = N0.getOperand(0);
- SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags);
- return DAG.getNode(ISD::FSUB, DL, VT, N1, Add, Flags);
+ SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
+ return DAG.getNode(ISD::FSUB, DL, VT, N1, Add);
}
// fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
if (isFMulNegTwo(N1)) {
SDValue B = N1.getOperand(0);
- SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags);
- return DAG.getNode(ISD::FSUB, DL, VT, N0, Add, Flags);
+ SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
+ return DAG.getNode(ISD::FSUB, DL, VT, N0, Add);
}
// No FP constant should be created after legalization as Instruction
@@ -12695,9 +13203,9 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
AllowNewConst) {
// fadd (fadd x, c1), c2 -> fadd x, c1 + c2
if (N1CFP && N0.getOpcode() == ISD::FADD &&
- isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
- SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1, Flags);
- return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC, Flags);
+ DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
+ SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1);
+ return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC);
}
// We can fold chains of FADD's of the same value into multiplications.
@@ -12705,14 +13213,14 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
// of rounding steps.
if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
if (N0.getOpcode() == ISD::FMUL) {
- bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
- bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
+ bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
+ bool CFP01 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
// (fadd (fmul x, c), x) -> (fmul x, c+1)
if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
- DAG.getConstantFP(1.0, DL, VT), Flags);
- return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
+ DAG.getConstantFP(1.0, DL, VT));
+ return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
}
// (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
@@ -12720,20 +13228,20 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
N1.getOperand(0) == N1.getOperand(1) &&
N0.getOperand(0) == N1.getOperand(0)) {
SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
- DAG.getConstantFP(2.0, DL, VT), Flags);
- return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
+ DAG.getConstantFP(2.0, DL, VT));
+ return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
}
}
if (N1.getOpcode() == ISD::FMUL) {
- bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
- bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
+ bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
+ bool CFP11 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
// (fadd x, (fmul x, c)) -> (fmul x, c+1)
if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
- DAG.getConstantFP(1.0, DL, VT), Flags);
- return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
+ DAG.getConstantFP(1.0, DL, VT));
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
}
// (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
@@ -12741,28 +13249,28 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
N0.getOperand(0) == N0.getOperand(1) &&
N1.getOperand(0) == N0.getOperand(0)) {
SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
- DAG.getConstantFP(2.0, DL, VT), Flags);
- return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
+ DAG.getConstantFP(2.0, DL, VT));
+ return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
}
}
if (N0.getOpcode() == ISD::FADD) {
- bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
+ bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
// (fadd (fadd x, x), x) -> (fmul x, 3.0)
if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
(N0.getOperand(0) == N1)) {
- return DAG.getNode(ISD::FMUL, DL, VT,
- N1, DAG.getConstantFP(3.0, DL, VT), Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N1,
+ DAG.getConstantFP(3.0, DL, VT));
}
}
if (N1.getOpcode() == ISD::FADD) {
- bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
+ bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
// (fadd x, (fadd x, x)) -> (fmul x, 3.0)
if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
N1.getOperand(0) == N0) {
- return DAG.getNode(ISD::FMUL, DL, VT,
- N0, DAG.getConstantFP(3.0, DL, VT), Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0,
+ DAG.getConstantFP(3.0, DL, VT));
}
}
@@ -12772,7 +13280,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
N1.getOperand(0) == N1.getOperand(1) &&
N0.getOperand(0) == N1.getOperand(0)) {
return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
- DAG.getConstantFP(4.0, DL, VT), Flags);
+ DAG.getConstantFP(4.0, DL, VT));
}
}
} // enable-unsafe-fp-math
@@ -12785,6 +13293,33 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitSTRICT_FADD(SDNode *N) {
+ SDValue Chain = N->getOperand(0);
+ SDValue N0 = N->getOperand(1);
+ SDValue N1 = N->getOperand(2);
+ EVT VT = N->getValueType(0);
+ EVT ChainVT = N->getValueType(1);
+ SDLoc DL(N);
+ SelectionDAG::FlagInserter FlagsInserter(DAG, N);
+
+ // fold (strict_fadd A, (fneg B)) -> (strict_fsub A, B)
+ if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
+ if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
+ N1, DAG, LegalOperations, ForCodeSize)) {
+ return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
+ {Chain, N0, NegN1});
+ }
+
+ // fold (strict_fadd (fneg A), B) -> (strict_fsub B, A)
+ if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
+ if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
+ N0, DAG, LegalOperations, ForCodeSize)) {
+ return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
+ {Chain, N1, NegN0});
+ }
+ return SDValue();
+}
+
SDValue DAGCombiner::visitFSUB(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -12794,6 +13329,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
SDLoc DL(N);
const TargetOptions &Options = DAG.getTarget().Options;
const SDNodeFlags Flags = N->getFlags();
+ SelectionDAG::FlagInserter FlagsInserter(DAG, N);
if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
return R;
@@ -12805,7 +13341,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
// fold (fsub c1, c2) -> c1-c2
if (N0CFP && N1CFP)
- return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags);
+ return DAG.getNode(ISD::FSUB, DL, VT, N0, N1);
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
@@ -12825,18 +13361,21 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
}
// (fsub -0.0, N1) -> -N1
- // NOTE: It is safe to transform an FSUB(-0.0,X) into an FNEG(X), since the
- // FSUB does not specify the sign bit of a NaN. Also note that for
- // the same reason, the inverse transform is not safe, unless fast math
- // flags are in play.
if (N0CFP && N0CFP->isZero()) {
if (N0CFP->isNegative() ||
(Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
- if (SDValue NegN1 =
- TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
- return NegN1;
- if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
- return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
+ // We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
+ // flushed to zero, unless all users treat denorms as zero (DAZ).
+ // FIXME: This transform will change the sign of a NaN and the behavior
+ // of a signaling NaN. It is only valid when a NoNaN flag is present.
+ DenormalMode DenormMode = DAG.getDenormalMode(VT);
+ if (DenormMode == DenormalMode::getIEEE()) {
+ if (SDValue NegN1 =
+ TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
+ return NegN1;
+ if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
+ return DAG.getNode(ISD::FNEG, DL, VT, N1);
+ }
}
}
@@ -12845,16 +13384,16 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
N1.getOpcode() == ISD::FADD) {
// X - (X + Y) -> -Y
if (N0 == N1->getOperand(0))
- return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1), Flags);
+ return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1));
// X - (Y + X) -> -Y
if (N0 == N1->getOperand(1))
- return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0), Flags);
+ return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0));
}
// fold (fsub A, (fneg B)) -> (fadd A, B)
if (SDValue NegN1 =
TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
- return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1, Flags);
+ return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1);
// FSUB -> FMA combines:
if (SDValue Fused = visitFSUBForFMACombine(N)) {
@@ -12874,6 +13413,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
SDLoc DL(N);
const TargetOptions &Options = DAG.getTarget().Options;
const SDNodeFlags Flags = N->getFlags();
+ SelectionDAG::FlagInserter FlagsInserter(DAG, N);
if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
return R;
@@ -12887,35 +13427,28 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
// fold (fmul c1, c2) -> c1*c2
if (N0CFP && N1CFP)
- return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, N1);
// canonicalize constant to RHS
- if (isConstantFPBuildVectorOrConstantFP(N0) &&
- !isConstantFPBuildVectorOrConstantFP(N1))
- return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);
+ if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
+ !DAG.isConstantFPBuildVectorOrConstantFP(N1))
+ return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
- if ((Options.NoNaNsFPMath && Options.NoSignedZerosFPMath) ||
- (Flags.hasNoNaNs() && Flags.hasNoSignedZeros())) {
- // fold (fmul A, 0) -> 0
- if (N1CFP && N1CFP->isZero())
- return N1;
- }
-
if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
// fmul (fmul X, C1), C2 -> fmul X, C1 * C2
- if (isConstantFPBuildVectorOrConstantFP(N1) &&
+ if (DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
N0.getOpcode() == ISD::FMUL) {
SDValue N00 = N0.getOperand(0);
SDValue N01 = N0.getOperand(1);
// Avoid an infinite loop by making sure that N00 is not a constant
// (the inner multiply has not been constant folded yet).
- if (isConstantFPBuildVectorOrConstantFP(N01) &&
- !isConstantFPBuildVectorOrConstantFP(N00)) {
- SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
- return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
+ if (DAG.isConstantFPBuildVectorOrConstantFP(N01) &&
+ !DAG.isConstantFPBuildVectorOrConstantFP(N00)) {
+ SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
+ return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
}
}
@@ -12924,14 +13457,14 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
N0.getOperand(0) == N0.getOperand(1)) {
const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
- SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags);
- return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags);
+ SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
}
}
// fold (fmul X, 2.0) -> (fadd X, X)
if (N1CFP && N1CFP->isExactlyValue(+2.0))
- return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags);
+ return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
// fold (fmul X, -1.0) -> (fneg X)
if (N1CFP && N1CFP->isExactlyValue(-1.0))
@@ -12950,7 +13483,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
if (NegN0 && NegN1 &&
(CostN0 == TargetLowering::NegatibleCost::Cheaper ||
CostN1 == TargetLowering::NegatibleCost::Cheaper))
- return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1, Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1);
// fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
// fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
@@ -13016,10 +13549,11 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
EVT VT = N->getValueType(0);
SDLoc DL(N);
const TargetOptions &Options = DAG.getTarget().Options;
-
// FMA nodes have flags that propagate to the created nodes.
- const SDNodeFlags Flags = N->getFlags();
- bool UnsafeFPMath = Options.UnsafeFPMath || isContractable(N);
+ SelectionDAG::FlagInserter FlagsInserter(DAG, N);
+
+ bool UnsafeFPMath =
+ Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
// Constant fold FMA.
if (isa<ConstantFPSDNode>(N0) &&
@@ -13040,7 +13574,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
if (NegN0 && NegN1 &&
(CostN0 == TargetLowering::NegatibleCost::Cheaper ||
CostN1 == TargetLowering::NegatibleCost::Cheaper))
- return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2, Flags);
+ return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
if (UnsafeFPMath) {
if (N0CFP && N0CFP->isZero())
@@ -13048,51 +13582,45 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
if (N1CFP && N1CFP->isZero())
return N2;
}
- // TODO: The FMA node should have flags that propagate to these nodes.
+
if (N0CFP && N0CFP->isExactlyValue(1.0))
return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
if (N1CFP && N1CFP->isExactlyValue(1.0))
return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
// Canonicalize (fma c, x, y) -> (fma x, c, y)
- if (isConstantFPBuildVectorOrConstantFP(N0) &&
- !isConstantFPBuildVectorOrConstantFP(N1))
+ if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
+ !DAG.isConstantFPBuildVectorOrConstantFP(N1))
return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
if (UnsafeFPMath) {
// (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
- isConstantFPBuildVectorOrConstantFP(N1) &&
- isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
+ DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
+ DAG.isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
return DAG.getNode(ISD::FMUL, DL, VT, N0,
- DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1),
- Flags), Flags);
+ DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1)));
}
// (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
if (N0.getOpcode() == ISD::FMUL &&
- isConstantFPBuildVectorOrConstantFP(N1) &&
- isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
- return DAG.getNode(ISD::FMA, DL, VT,
- N0.getOperand(0),
- DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1),
- Flags),
+ DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
+ DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
+ return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
+ DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1)),
N2);
}
}
- // (fma x, 1, y) -> (fadd x, y)
// (fma x, -1, y) -> (fadd (fneg x), y)
if (N1CFP) {
if (N1CFP->isExactlyValue(1.0))
- // TODO: The FMA node should have flags that propagate to this node.
return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
if (N1CFP->isExactlyValue(-1.0) &&
(!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
AddToWorklist(RHSNeg.getNode());
- // TODO: The FMA node should have flags that propagate to this node.
return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
}
@@ -13102,25 +13630,23 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
(N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT,
ForCodeSize)))) {
return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
- DAG.getNode(ISD::FNEG, DL, VT, N1, Flags), N2);
+ DAG.getNode(ISD::FNEG, DL, VT, N1), N2);
}
}
if (UnsafeFPMath) {
// (fma x, c, x) -> (fmul x, (c+1))
if (N1CFP && N0 == N2) {
- return DAG.getNode(ISD::FMUL, DL, VT, N0,
- DAG.getNode(ISD::FADD, DL, VT, N1,
- DAG.getConstantFP(1.0, DL, VT), Flags),
- Flags);
+ return DAG.getNode(
+ ISD::FMUL, DL, VT, N0,
+ DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(1.0, DL, VT)));
}
// (fma x, c, (fneg x)) -> (fmul x, (c-1))
if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
- return DAG.getNode(ISD::FMUL, DL, VT, N0,
- DAG.getNode(ISD::FADD, DL, VT, N1,
- DAG.getConstantFP(-1.0, DL, VT), Flags),
- Flags);
+ return DAG.getNode(
+ ISD::FMUL, DL, VT, N0,
+ DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(-1.0, DL, VT)));
}
}
@@ -13129,7 +13655,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
if (!TLI.isFNegFree(VT))
if (SDValue Neg = TLI.getCheaperNegatedExpression(
SDValue(N, 0), DAG, LegalOperations, ForCodeSize))
- return DAG.getNode(ISD::FNEG, DL, VT, Neg, Flags);
+ return DAG.getNode(ISD::FNEG, DL, VT, Neg);
return SDValue();
}
@@ -13150,14 +13676,13 @@ SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
return SDValue();
// Skip if current node is a reciprocal/fneg-reciprocal.
- SDValue N0 = N->getOperand(0);
+ SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
return SDValue();
// Exit early if the target does not want this transform or if there can't
// possibly be enough uses of the divisor to make the transform worthwhile.
- SDValue N1 = N->getOperand(1);
unsigned MinUses = TLI.combineRepeatedFPDivisors();
// For splat vectors, scale the number of uses by the splat factor. If we can
@@ -13175,6 +13700,13 @@ SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
SetVector<SDNode *> Users;
for (auto *U : N1->uses()) {
if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
+ // Skip X/sqrt(X) that has not been simplified to sqrt(X) yet.
+ if (U->getOperand(1).getOpcode() == ISD::FSQRT &&
+ U->getOperand(0) == U->getOperand(1).getOperand(0) &&
+ U->getFlags().hasAllowReassociation() &&
+ U->getFlags().hasNoSignedZeros())
+ continue;
+
// This division is eligible for optimization only if global unsafe math
// is enabled or if this division allows reciprocal formation.
if (UnsafeMath || U->getFlags().hasAllowReciprocal())
@@ -13216,6 +13748,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
SDLoc DL(N);
const TargetOptions &Options = DAG.getTarget().Options;
SDNodeFlags Flags = N->getFlags();
+ SelectionDAG::FlagInserter FlagsInserter(DAG, N);
if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
return R;
@@ -13227,7 +13760,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
// fold (fdiv c1, c2) -> c1/c2
if (N0CFP && N1CFP)
- return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);
+ return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1);
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
@@ -13252,29 +13785,29 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
TLI.isOperationLegal(ISD::ConstantFP, VT) ||
TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
return DAG.getNode(ISD::FMUL, DL, VT, N0,
- DAG.getConstantFP(Recip, DL, VT), Flags);
+ DAG.getConstantFP(Recip, DL, VT));
}
// If this FDIV is part of a reciprocal square root, it may be folded
// into a target-specific square root estimate instruction.
if (N1.getOpcode() == ISD::FSQRT) {
if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
- return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
} else if (N1.getOpcode() == ISD::FP_EXTEND &&
N1.getOperand(0).getOpcode() == ISD::FSQRT) {
- if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
- Flags)) {
+ if (SDValue RV =
+ buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
AddToWorklist(RV.getNode());
- return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
}
} else if (N1.getOpcode() == ISD::FP_ROUND &&
N1.getOperand(0).getOpcode() == ISD::FSQRT) {
- if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
- Flags)) {
+ if (SDValue RV =
+ buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
AddToWorklist(RV.getNode());
- return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
}
} else if (N1.getOpcode() == ISD::FMUL) {
// Look through an FMUL. Even though this won't remove the FDIV directly,
@@ -13289,29 +13822,34 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
}
if (Sqrt.getNode()) {
// If the other multiply operand is known positive, pull it into the
- // sqrt. That will eliminate the division if we convert to an estimate:
- // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
- // TODO: Also fold the case where A == Z (fabs is missing).
+ // sqrt. That will eliminate the division if we convert to an estimate.
if (Flags.hasAllowReassociation() && N1.hasOneUse() &&
- N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse() &&
- Y.getOpcode() == ISD::FABS && Y.hasOneUse()) {
- SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, Y.getOperand(0),
- Y.getOperand(0), Flags);
- SDValue AAZ =
- DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0), Flags);
- if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
- return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt, Flags);
-
- // Estimate creation failed. Clean up speculatively created nodes.
- recursivelyDeleteUnusedNodes(AAZ.getNode());
+ N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse()) {
+ SDValue A;
+ if (Y.getOpcode() == ISD::FABS && Y.hasOneUse())
+ A = Y.getOperand(0);
+ else if (Y == Sqrt.getOperand(0))
+ A = Y;
+ if (A) {
+ // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
+ // X / (A * sqrt(A)) --> X / sqrt(A*A*A) --> X * rsqrt(A*A*A)
+ SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A);
+ SDValue AAZ =
+ DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0));
+ if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt);
+
+ // Estimate creation failed. Clean up speculatively created nodes.
+ recursivelyDeleteUnusedNodes(AAZ.getNode());
+ }
}
// We found a FSQRT, so try to make this fold:
// X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) {
- SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y, Flags);
+ SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y);
AddToWorklist(Div.getNode());
- return DAG.getNode(ISD::FMUL, DL, VT, N0, Div, Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, N0, Div);
}
}
}
@@ -13322,6 +13860,12 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
return RV;
}
+ // Fold X/Sqrt(X) -> Sqrt(X)
+ if ((Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
+ (Options.UnsafeFPMath || Flags.hasAllowReassociation()))
+ if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0))
+ return N1;
+
// (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
TargetLowering::NegatibleCost CostN0 =
TargetLowering::NegatibleCost::Expensive;
@@ -13334,7 +13878,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
if (NegN0 && NegN1 &&
(CostN0 == TargetLowering::NegatibleCost::Cheaper ||
CostN1 == TargetLowering::NegatibleCost::Cheaper))
- return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1, Flags);
+ return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1);
return SDValue();
}
@@ -13346,13 +13890,14 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
EVT VT = N->getValueType(0);
SDNodeFlags Flags = N->getFlags();
+ SelectionDAG::FlagInserter FlagsInserter(DAG, N);
if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
return R;
// fold (frem c1, c2) -> fmod(c1,c2)
if (N0CFP && N1CFP)
- return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, N->getFlags());
+ return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1);
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
@@ -13366,7 +13911,7 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) {
// Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
// sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
- if ((!Options.UnsafeFPMath && !Flags.hasApproximateFuncs()) ||
+ if (!Flags.hasApproximateFuncs() ||
(!Options.NoInfsFPMath && !Flags.hasNoInfs()))
return SDValue();
@@ -13375,6 +13920,10 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) {
return SDValue();
// FSQRT nodes have flags that propagate to the created nodes.
+ // TODO: If this is N0/sqrt(N0), and we reach this node before trying to
+ // transform the fdiv, we may produce a sub-optimal estimate sequence
+ // because the reciprocal calculation may not have to filter out a
+ // 0.0 input.
return buildSqrtEstimate(N0, Flags);
}
@@ -13398,8 +13947,8 @@ static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
- bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
+ bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0);
+ bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
EVT VT = N->getValueType(0);
if (N0CFP && N1CFP) // Constant fold
@@ -13446,6 +13995,7 @@ SDValue DAGCombiner::visitFPOW(SDNode *N) {
ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
if (!ExponentC)
return SDValue();
+ SelectionDAG::FlagInserter FlagsInserter(DAG, N);
// Try to convert x ** (1/3) into cube root.
// TODO: Handle the various flavors of long double.
@@ -13472,7 +14022,7 @@ SDValue DAGCombiner::visitFPOW(SDNode *N) {
DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT)))
return SDValue();
- return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0), Flags);
+ return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0));
}
// Try to convert x ** (1/4) and x ** (3/4) into square roots.
@@ -13507,12 +14057,12 @@ SDValue DAGCombiner::visitFPOW(SDNode *N) {
// pow(X, 0.25) --> sqrt(sqrt(X))
SDLoc DL(N);
- SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0), Flags);
- SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt, Flags);
+ SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0));
+ SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt);
if (ExponentIs025)
return SqrtSqrt;
// pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
- return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt, Flags);
+ return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt);
}
return SDValue();
@@ -13695,7 +14245,7 @@ SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
return DAG.getUNDEF(VT);
// fold (fp_to_sint c1fp) -> c1
- if (isConstantFPBuildVectorOrConstantFP(N0))
+ if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
return FoldIntToFPToInt(N, DAG);
@@ -13710,7 +14260,7 @@ SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
return DAG.getUNDEF(VT);
// fold (fp_to_uint c1fp) -> c1
- if (isConstantFPBuildVectorOrConstantFP(N0))
+ if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
return FoldIntToFPToInt(N, DAG);
@@ -13782,7 +14332,7 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
return SDValue();
// fold (fp_extend c1fp) -> c1fp
- if (isConstantFPBuildVectorOrConstantFP(N0))
+ if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
// fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
@@ -13830,7 +14380,7 @@ SDValue DAGCombiner::visitFCEIL(SDNode *N) {
EVT VT = N->getValueType(0);
// fold (fceil c1) -> fceil(c1)
- if (isConstantFPBuildVectorOrConstantFP(N0))
+ if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
return SDValue();
@@ -13841,7 +14391,7 @@ SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
EVT VT = N->getValueType(0);
// fold (ftrunc c1) -> ftrunc(c1)
- if (isConstantFPBuildVectorOrConstantFP(N0))
+ if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
// fold ftrunc (known rounded int x) -> x
@@ -13865,19 +14415,19 @@ SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
EVT VT = N->getValueType(0);
// fold (ffloor c1) -> ffloor(c1)
- if (isConstantFPBuildVectorOrConstantFP(N0))
+ if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
return SDValue();
}
-// FIXME: FNEG and FABS have a lot in common; refactor.
SDValue DAGCombiner::visitFNEG(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ SelectionDAG::FlagInserter FlagsInserter(DAG, N);
// Constant fold FNEG.
- if (isConstantFPBuildVectorOrConstantFP(N0))
+ if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
if (SDValue NegN0 =
@@ -13892,51 +14442,12 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
(DAG.getTarget().Options.NoSignedZerosFPMath ||
N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
- N0.getOperand(0), N->getFlags());
- }
-
- // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
- // constant pool values.
- if (!TLI.isFNegFree(VT) &&
- N0.getOpcode() == ISD::BITCAST &&
- N0.getNode()->hasOneUse()) {
- SDValue Int = N0.getOperand(0);
- EVT IntVT = Int.getValueType();
- if (IntVT.isInteger() && !IntVT.isVector()) {
- APInt SignMask;
- if (N0.getValueType().isVector()) {
- // For a vector, get a mask such as 0x80... per scalar element
- // and splat it.
- SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
- SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
- } else {
- // For a scalar, just generate 0x80...
- SignMask = APInt::getSignMask(IntVT.getSizeInBits());
- }
- SDLoc DL0(N0);
- Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
- DAG.getConstant(SignMask, DL0, IntVT));
- AddToWorklist(Int.getNode());
- return DAG.getBitcast(VT, Int);
- }
- }
-
- // (fneg (fmul c, x)) -> (fmul -c, x)
- if (N0.getOpcode() == ISD::FMUL &&
- (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) {
- ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
- if (CFP1) {
- APFloat CVal = CFP1->getValueAPF();
- CVal.changeSign();
- if (LegalDAG && (TLI.isFPImmLegal(CVal, VT, ForCodeSize) ||
- TLI.isOperationLegal(ISD::ConstantFP, VT)))
- return DAG.getNode(
- ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
- DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)),
- N0->getFlags());
- }
+ N0.getOperand(0));
}
+ if (SDValue Cast = foldSignChangeInBitcast(N))
+ return Cast;
+
return SDValue();
}
@@ -13947,6 +14458,11 @@ static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N,
EVT VT = N->getValueType(0);
const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
+ const SDNodeFlags Flags = N->getFlags();
+ unsigned Opc = N->getOpcode();
+ bool PropagatesNaN = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM;
+ bool IsMin = Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM;
+ SelectionDAG::FlagInserter FlagsInserter(DAG, N);
if (N0CFP && N1CFP) {
const APFloat &C0 = N0CFP->getValueAPF();
@@ -13955,10 +14471,39 @@ static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N,
}
// Canonicalize to constant on RHS.
- if (isConstantFPBuildVectorOrConstantFP(N0) &&
- !isConstantFPBuildVectorOrConstantFP(N1))
+ if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
+ !DAG.isConstantFPBuildVectorOrConstantFP(N1))
return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
+ if (N1CFP) {
+ const APFloat &AF = N1CFP->getValueAPF();
+
+ // minnum(X, nan) -> X
+ // maxnum(X, nan) -> X
+ // minimum(X, nan) -> nan
+ // maximum(X, nan) -> nan
+ if (AF.isNaN())
+ return PropagatesNaN ? N->getOperand(1) : N->getOperand(0);
+
+ // In the following folds, inf can be replaced with the largest finite
+ // float, if the ninf flag is set.
+ if (AF.isInfinity() || (Flags.hasNoInfs() && AF.isLargest())) {
+ // minnum(X, -inf) -> -inf
+ // maxnum(X, +inf) -> +inf
+ // minimum(X, -inf) -> -inf if nnan
+ // maximum(X, +inf) -> +inf if nnan
+ if (IsMin == AF.isNegative() && (!PropagatesNaN || Flags.hasNoNaNs()))
+ return N->getOperand(1);
+
+ // minnum(X, +inf) -> X if nnan
+ // maxnum(X, -inf) -> X if nnan
+ // minimum(X, +inf) -> X
+ // maximum(X, -inf) -> X
+ if (IsMin != AF.isNegative() && (PropagatesNaN || Flags.hasNoNaNs()))
+ return N->getOperand(0);
+ }
+ }
+
return SDValue();
}
@@ -13983,7 +14528,7 @@ SDValue DAGCombiner::visitFABS(SDNode *N) {
EVT VT = N->getValueType(0);
// fold (fabs c1) -> fabs(c1)
- if (isConstantFPBuildVectorOrConstantFP(N0))
+ if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
// fold (fabs (fabs x)) -> (fabs x)
@@ -13995,28 +14540,8 @@ SDValue DAGCombiner::visitFABS(SDNode *N) {
if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
- // fabs(bitcast(x)) -> bitcast(x & ~sign) to avoid constant pool loads.
- if (!TLI.isFAbsFree(VT) && N0.getOpcode() == ISD::BITCAST && N0.hasOneUse()) {
- SDValue Int = N0.getOperand(0);
- EVT IntVT = Int.getValueType();
- if (IntVT.isInteger() && !IntVT.isVector()) {
- APInt SignMask;
- if (N0.getValueType().isVector()) {
- // For a vector, get a mask such as 0x7f... per scalar element
- // and splat it.
- SignMask = ~APInt::getSignMask(N0.getScalarValueSizeInBits());
- SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
- } else {
- // For a scalar, just generate 0x7f...
- SignMask = ~APInt::getSignMask(IntVT.getSizeInBits());
- }
- SDLoc DL(N0);
- Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
- DAG.getConstant(SignMask, DL, IntVT));
- AddToWorklist(Int.getNode());
- return DAG.getBitcast(N->getValueType(0), Int);
- }
- }
+ if (SDValue Cast = foldSignChangeInBitcast(N))
+ return Cast;
return SDValue();
}
@@ -14026,6 +14551,13 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
SDValue N1 = N->getOperand(1);
SDValue N2 = N->getOperand(2);
+ // BRCOND(FREEZE(cond)) is equivalent to BRCOND(cond) (both are
+ // nondeterministic jumps).
+ if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse()) {
+ return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
+ N1->getOperand(0), N2);
+ }
+
// If N is a constant we could fold this into a fallthrough or unconditional
// branch. However that doesn't happen very often in normal code, because
// Instcombine/SimplifyCFG should have handled the available opportunities.
@@ -14179,63 +14711,6 @@ SDValue DAGCombiner::visitBR_CC(SDNode *N) {
return SDValue();
}
-/// Return true if 'Use' is a load or a store that uses N as its base pointer
-/// and that N may be folded in the load / store addressing mode.
-static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
- SelectionDAG &DAG,
- const TargetLowering &TLI) {
- EVT VT;
- unsigned AS;
-
- if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
- if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
- return false;
- VT = LD->getMemoryVT();
- AS = LD->getAddressSpace();
- } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
- if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
- return false;
- VT = ST->getMemoryVT();
- AS = ST->getAddressSpace();
- } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
- if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
- return false;
- VT = LD->getMemoryVT();
- AS = LD->getAddressSpace();
- } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
- if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
- return false;
- VT = ST->getMemoryVT();
- AS = ST->getAddressSpace();
- } else
- return false;
-
- TargetLowering::AddrMode AM;
- if (N->getOpcode() == ISD::ADD) {
- AM.HasBaseReg = true;
- ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
- if (Offset)
- // [reg +/- imm]
- AM.BaseOffs = Offset->getSExtValue();
- else
- // [reg +/- reg]
- AM.Scale = 1;
- } else if (N->getOpcode() == ISD::SUB) {
- AM.HasBaseReg = true;
- ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
- if (Offset)
- // [reg +/- imm]
- AM.BaseOffs = -Offset->getSExtValue();
- else
- // [reg +/- reg]
- AM.Scale = 1;
- } else
- return false;
-
- return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
- VT.getTypeForEVT(*DAG.getContext()), AS);
-}
-
static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
bool &IsLoad, bool &IsMasked, SDValue &Ptr,
const TargetLowering &TLI) {
@@ -14464,16 +14939,13 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
// Therefore, we have:
// t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
- ConstantSDNode *CN =
- cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
- int X0, X1, Y0, Y1;
+ auto *CN = cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
const APInt &Offset0 = CN->getAPIntValue();
- APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
-
- X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
- Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
- X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
- Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
+ const APInt &Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
+ int X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
+ int Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
+ int X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
+ int Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
@@ -14665,8 +15137,8 @@ SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
}
-static inline int numVectorEltsOrZero(EVT T) {
- return T.isVector() ? T.getVectorNumElements() : 0;
+static inline ElementCount numVectorEltsOrZero(EVT T) {
+ return T.isVector() ? T.getVectorElementCount() : ElementCount::getFixed(0);
}
bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
@@ -14734,6 +15206,24 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
EVT STMemType = ST->getMemoryVT();
EVT STType = ST->getValue().getValueType();
+ // There are two cases to consider here:
+ // 1. The store is fixed width and the load is scalable. In this case we
+ // don't know at compile time if the store completely envelops the load
+ // so we abandon the optimisation.
+ // 2. The store is scalable and the load is fixed width. We could
+ // potentially support a limited number of cases here, but there has been
+ // no cost-benefit analysis to prove it's worth it.
+ bool LdStScalable = LDMemType.isScalableVector();
+ if (LdStScalable != STMemType.isScalableVector())
+ return SDValue();
+
+ // If we are dealing with scalable vectors on a big endian platform the
+ // calculation of offsets below becomes trickier, since we do not know at
+ // compile time the absolute size of the vector. Until we've done more
+ // analysis on big-endian platforms it seems better to bail out for now.
+ if (LdStScalable && DAG.getDataLayout().isBigEndian())
+ return SDValue();
+
BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG);
int64_t Offset;
@@ -14745,13 +15235,21 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
// the stored value). With Offset=n (for n > 0) the loaded value starts at the
// n:th least significant byte of the stored value.
if (DAG.getDataLayout().isBigEndian())
- Offset = ((int64_t)STMemType.getStoreSizeInBits() -
- (int64_t)LDMemType.getStoreSizeInBits()) / 8 - Offset;
+ Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedSize() -
+ (int64_t)LDMemType.getStoreSizeInBits().getFixedSize()) /
+ 8 -
+ Offset;
// Check that the stored value cover all bits that are loaded.
- bool STCoversLD =
- (Offset >= 0) &&
- (Offset * 8 + LDMemType.getSizeInBits() <= STMemType.getSizeInBits());
+ bool STCoversLD;
+
+ TypeSize LdMemSize = LDMemType.getSizeInBits();
+ TypeSize StMemSize = STMemType.getSizeInBits();
+ if (LdStScalable)
+ STCoversLD = (Offset == 0) && LdMemSize == StMemSize;
+ else
+ STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedSize() <=
+ StMemSize.getFixedSize());
auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
if (LD->isIndexed()) {
@@ -14772,15 +15270,15 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
// Memory as copy space (potentially masked).
if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
// Simple case: Direct non-truncating forwarding
- if (LDType.getSizeInBits() == LDMemType.getSizeInBits())
+ if (LDType.getSizeInBits() == LdMemSize)
return ReplaceLd(LD, ST->getValue(), Chain);
// Can we model the truncate and extension with an and mask?
if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
!LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
// Mask to size of LDMemType
auto Mask =
- DAG.getConstant(APInt::getLowBitsSet(STType.getSizeInBits(),
- STMemType.getSizeInBits()),
+ DAG.getConstant(APInt::getLowBitsSet(STType.getFixedSizeInBits(),
+ StMemSize.getFixedSize()),
SDLoc(ST), STType);
auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
return ReplaceLd(LD, Val, Chain);
@@ -15603,8 +16101,6 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
// Figure out the offset for the store and the alignment of the access.
unsigned StOffset;
- unsigned NewAlign = St->getAlignment();
-
if (DAG.getDataLayout().isLittleEndian())
StOffset = ByteShift;
else
@@ -15613,8 +16109,7 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
SDValue Ptr = St->getBasePtr();
if (StOffset) {
SDLoc DL(IVal);
- Ptr = DAG.getMemBasePlusOffset(Ptr, StOffset, DL);
- NewAlign = MinAlign(NewAlign, StOffset);
+ Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(StOffset), DL);
}
// Truncate down to the new size.
@@ -15623,7 +16118,8 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
++OpsNarrowed;
return DAG
.getStore(St->getChain(), SDLoc(St), IVal, Ptr,
- St->getPointerInfo().getWithOffset(StOffset), NewAlign);
+ St->getPointerInfo().getWithOffset(StOffset),
+ St->getOriginalAlign());
}
/// Look for sequence of load / op / store where op is one of 'or', 'xor', and
@@ -15727,7 +16223,8 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
if (NewAlign < DAG.getDataLayout().getABITypeAlign(NewVTTy))
return SDValue();
- SDValue NewPtr = DAG.getMemBasePlusOffset(Ptr, PtrOff, SDLoc(LD));
+ SDValue NewPtr =
+ DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(PtrOff), SDLoc(LD));
SDValue NewLD =
DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
@@ -16035,9 +16532,9 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
// make sure we use trunc store if it's necessary to be legal.
SDValue NewStore;
if (!UseTrunc) {
- NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
- FirstInChain->getPointerInfo(),
- FirstInChain->getAlignment());
+ NewStore =
+ DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
+ FirstInChain->getPointerInfo(), FirstInChain->getAlign());
} else { // Must be realized as a trunc store
EVT LegalizedStoredValTy =
TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
@@ -16049,8 +16546,7 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
NewStore = DAG.getTruncStore(
NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
- FirstInChain->getAlignment(),
- FirstInChain->getMemOperand()->getFlags());
+ FirstInChain->getAlign(), FirstInChain->getMemOperand()->getFlags());
}
// Replace all merged stores with the new store.
@@ -16065,23 +16561,19 @@ void DAGCombiner::getStoreMergeCandidates(
StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
SDNode *&RootNode) {
// This holds the base pointer, index, and the offset in bytes from the base
- // pointer.
+ // pointer. We must have a base and an offset. Do not handle stores to undef
+ // base pointers.
BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
- EVT MemVT = St->getMemoryVT();
-
- SDValue Val = peekThroughBitcasts(St->getValue());
- // We must have a base and an offset.
- if (!BasePtr.getBase().getNode())
- return;
-
- // Do not handle stores to undef base pointers.
- if (BasePtr.getBase().isUndef())
+ if (!BasePtr.getBase().getNode() || BasePtr.getBase().isUndef())
return;
+ SDValue Val = peekThroughBitcasts(St->getValue());
StoreSource StoreSrc = getStoreSource(Val);
assert(StoreSrc != StoreSource::Unknown && "Expected known source for store");
- BaseIndexOffset LBasePtr;
+
// Match on loadbaseptr if relevant.
+ EVT MemVT = St->getMemoryVT();
+ BaseIndexOffset LBasePtr;
EVT LoadVT;
if (StoreSrc == StoreSource::Load) {
auto *Ld = cast<LoadSDNode>(Val);
@@ -16102,7 +16594,7 @@ void DAGCombiner::getStoreMergeCandidates(
int64_t &Offset) -> bool {
// The memory operands must not be volatile/indexed/atomic.
// TODO: May be able to relax for unordered atomics (see D66309)
- if (!Other->isSimple() || Other->isIndexed())
+ if (!Other->isSimple() || Other->isIndexed())
return false;
// Don't mix temporal stores with non-temporal stores.
if (St->isNonTemporal() != Other->isNonTemporal())
@@ -16111,37 +16603,38 @@ void DAGCombiner::getStoreMergeCandidates(
// Allow merging constants of different types as integers.
bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
: Other->getMemoryVT() != MemVT;
- if (StoreSrc == StoreSource::Load) {
+ switch (StoreSrc) {
+ case StoreSource::Load: {
if (NoTypeMatch)
return false;
- // The Load's Base Ptr must also match
- if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(OtherBC)) {
- BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
- if (LoadVT != OtherLd->getMemoryVT())
- return false;
- // Loads must only have one use.
- if (!OtherLd->hasNUsesOfValue(1, 0))
- return false;
- // The memory operands must not be volatile/indexed/atomic.
- // TODO: May be able to relax for unordered atomics (see D66309)
- if (!OtherLd->isSimple() ||
- OtherLd->isIndexed())
- return false;
- // Don't mix temporal loads with non-temporal loads.
- if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
- return false;
- if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
- return false;
- } else
+ // The Load's Base Ptr must also match.
+ auto *OtherLd = dyn_cast<LoadSDNode>(OtherBC);
+ if (!OtherLd)
+ return false;
+ BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
+ if (LoadVT != OtherLd->getMemoryVT())
+ return false;
+ // Loads must only have one use.
+ if (!OtherLd->hasNUsesOfValue(1, 0))
+ return false;
+ // The memory operands must not be volatile/indexed/atomic.
+ // TODO: May be able to relax for unordered atomics (see D66309)
+ if (!OtherLd->isSimple() || OtherLd->isIndexed())
return false;
+ // Don't mix temporal loads with non-temporal loads.
+ if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
+ return false;
+ if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
+ return false;
+ break;
}
- if (StoreSrc == StoreSource::Constant) {
+ case StoreSource::Constant:
if (NoTypeMatch)
return false;
if (!(isa<ConstantSDNode>(OtherBC) || isa<ConstantFPSDNode>(OtherBC)))
return false;
- }
- if (StoreSrc == StoreSource::Extract) {
+ break;
+ case StoreSource::Extract:
// Do not merge truncated stores here.
if (Other->isTruncatingStore())
return false;
@@ -16150,6 +16643,9 @@ void DAGCombiner::getStoreMergeCandidates(
if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
return false;
+ break;
+ default:
+ llvm_unreachable("Unhandled store source for merging");
}
Ptr = BaseIndexOffset::match(Other, DAG);
return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
@@ -16160,11 +16656,22 @@ void DAGCombiner::getStoreMergeCandidates(
auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
SDNode *RootNode) -> bool {
auto RootCount = StoreRootCountMap.find(StoreNode);
- if (RootCount != StoreRootCountMap.end() &&
- RootCount->second.first == RootNode &&
- RootCount->second.second > StoreMergeDependenceLimit)
- return true;
- return false;
+ return RootCount != StoreRootCountMap.end() &&
+ RootCount->second.first == RootNode &&
+ RootCount->second.second > StoreMergeDependenceLimit;
+ };
+
+ auto TryToAddCandidate = [&](SDNode::use_iterator UseIter) {
+ // This must be a chain use.
+ if (UseIter.getOperandNo() != 0)
+ return;
+ if (auto *OtherStore = dyn_cast<StoreSDNode>(*UseIter)) {
+ BaseIndexOffset Ptr;
+ int64_t PtrDiff;
+ if (CandidateMatch(OtherStore, Ptr, PtrDiff) &&
+ !OverLimitInDependenceCheck(OtherStore, RootNode))
+ StoreNodes.push_back(MemOpLink(OtherStore, PtrDiff));
+ }
};
// We looking for a root node which is an ancestor to all mergable
@@ -16186,31 +16693,21 @@ void DAGCombiner::getStoreMergeCandidates(
RootNode = St->getChain().getNode();
unsigned NumNodesExplored = 0;
- if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
+ const unsigned MaxSearchNodes = 1024;
+ if (auto *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
RootNode = Ldn->getChain().getNode();
for (auto I = RootNode->use_begin(), E = RootNode->use_end();
- I != E && NumNodesExplored < 1024; ++I, ++NumNodesExplored)
- if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) // walk down chain
+ I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) {
+ if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) { // walk down chain
for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
- if (I2.getOperandNo() == 0)
- if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) {
- BaseIndexOffset Ptr;
- int64_t PtrDiff;
- if (CandidateMatch(OtherST, Ptr, PtrDiff) &&
- !OverLimitInDependenceCheck(OtherST, RootNode))
- StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
- }
- } else
+ TryToAddCandidate(I2);
+ }
+ }
+ } else {
for (auto I = RootNode->use_begin(), E = RootNode->use_end();
- I != E && NumNodesExplored < 1024; ++I, ++NumNodesExplored)
- if (I.getOperandNo() == 0)
- if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
- BaseIndexOffset Ptr;
- int64_t PtrDiff;
- if (CandidateMatch(OtherST, Ptr, PtrDiff) &&
- !OverLimitInDependenceCheck(OtherST, RootNode))
- StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
- }
+ I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored)
+ TryToAddCandidate(I);
+ }
}
// We need to check that merging these stores does not cause a loop in
@@ -16580,7 +17077,7 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
}
LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
unsigned FirstStoreAS = FirstInChain->getAddressSpace();
- unsigned FirstStoreAlign = FirstInChain->getAlignment();
+ Align FirstStoreAlign = FirstInChain->getAlign();
LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
// Scan the memory operations on the chain and find the first
@@ -16675,7 +17172,7 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
// the NumElem refers to array/index size.
unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
NumElem = std::min(LastLegalType, NumElem);
- unsigned FirstLoadAlign = FirstLoad->getAlignment();
+ Align FirstLoadAlign = FirstLoad->getAlign();
if (NumElem < 2) {
// We know that candidate stores are in order and of correct
@@ -16687,8 +17184,8 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
// can here.
unsigned NumSkip = 1;
while ((NumSkip < LoadNodes.size()) &&
- (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) &&
- (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
+ (LoadNodes[NumSkip].MemNode->getAlign() <= FirstLoadAlign) &&
+ (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
NumSkip++;
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
@@ -16761,11 +17258,10 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
FirstLoad->getChain(), FirstLoad->getBasePtr(),
FirstLoad->getPointerInfo(), JointMemOpVT,
FirstLoadAlign, LdMMOFlags);
- NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
- FirstInChain->getBasePtr(),
- FirstInChain->getPointerInfo(), JointMemOpVT,
- FirstInChain->getAlignment(),
- FirstInChain->getMemOperand()->getFlags());
+ NewStore = DAG.getTruncStore(
+ NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
+ FirstInChain->getPointerInfo(), JointMemOpVT,
+ FirstInChain->getAlign(), FirstInChain->getMemOperand()->getFlags());
}
// Transfer chain users from old loads to the new load.
@@ -16967,17 +17463,15 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
if (DAG.getDataLayout().isBigEndian())
std::swap(Lo, Hi);
- unsigned Alignment = ST->getAlignment();
MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
AAMDNodes AAInfo = ST->getAAInfo();
SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
- ST->getAlignment(), MMOFlags, AAInfo);
- Ptr = DAG.getMemBasePlusOffset(Ptr, 4, DL);
- Alignment = MinAlign(Alignment, 4U);
+ ST->getOriginalAlign(), MMOFlags, AAInfo);
+ Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(4), DL);
SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
ST->getPointerInfo().getWithOffset(4),
- Alignment, MMOFlags, AAInfo);
+ ST->getOriginalAlign(), MMOFlags, AAInfo);
return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
St0, St1);
}
@@ -17038,7 +17532,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
return NewST;
// Try transforming several stores into STORE (BSWAP).
- if (SDValue Store = MatchStoreCombine(ST))
+ if (SDValue Store = mergeTruncStores(ST))
return Store;
if (ST->isUnindexed()) {
@@ -17111,11 +17605,12 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
!ST1->getBasePtr().isUndef() &&
// BaseIndexOffset and the code below requires knowing the size
// of a vector, so bail out if MemoryVT is scalable.
+ !ST->getMemoryVT().isScalableVector() &&
!ST1->getMemoryVT().isScalableVector()) {
const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
- unsigned STBitSize = ST->getMemoryVT().getSizeInBits();
- unsigned ChainBitSize = ST1->getMemoryVT().getSizeInBits();
+ unsigned STBitSize = ST->getMemoryVT().getFixedSizeInBits();
+ unsigned ChainBitSize = ST1->getMemoryVT().getFixedSizeInBits();
// If this is a store who's preceding store to a subset of the current
// location and no one other node is chained to that store we can
// effectively drop the store. Do not remove stores to undef as they may
@@ -17186,8 +17681,7 @@ SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
// We walk up the chains to find stores.
SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
while (!Chains.empty()) {
- SDValue Chain = Chains.back();
- Chains.pop_back();
+ SDValue Chain = Chains.pop_back_val();
if (!Chain.hasOneUse())
continue;
switch (Chain.getOpcode()) {
@@ -17207,11 +17701,16 @@ SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
// TODO: Can relax for unordered atomics (see D66309)
if (!ST->isSimple() || ST->isIndexed())
continue;
+ const TypeSize StoreSize = ST->getMemoryVT().getStoreSize();
+ // The bounds of a scalable store are not known until runtime, so this
+ // store cannot be elided.
+ if (StoreSize.isScalable())
+ continue;
const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
// If we store purely within object bounds just before its lifetime ends,
// we can remove the store.
if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
- ST->getMemoryVT().getStoreSizeInBits())) {
+ StoreSize.getFixedSize() * 8)) {
LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
dbgs() << "\nwithin LIFETIME_END of : ";
LifetimeEndBase.dump(); dbgs() << "\n");
@@ -17310,7 +17809,6 @@ SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
return SDValue();
// Start to split store.
- unsigned Alignment = ST->getAlignment();
MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
AAMDNodes AAInfo = ST->getAAInfo();
@@ -17323,13 +17821,12 @@ SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
SDValue Ptr = ST->getBasePtr();
// Lower value store.
SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
- ST->getAlignment(), MMOFlags, AAInfo);
- Ptr = DAG.getMemBasePlusOffset(Ptr, HalfValBitSize / 8, DL);
+ ST->getOriginalAlign(), MMOFlags, AAInfo);
+ Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(HalfValBitSize / 8), DL);
// Higher value store.
- SDValue St1 =
- DAG.getStore(St0, DL, Hi, Ptr,
- ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
- Alignment / 2, MMOFlags, AAInfo);
+ SDValue St1 = DAG.getStore(
+ St0, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
+ ST->getOriginalAlign(), MMOFlags, AAInfo);
return St1;
}
@@ -17567,6 +18064,13 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
EVT ResultVT = EVE->getValueType(0);
EVT VecEltVT = InVecVT.getVectorElementType();
+
+ // If the vector element type is not a multiple of a byte then we are unable
+ // to correctly compute an address to load only the extracted element as a
+ // scalar.
+ if (!VecEltVT.isByteSized())
+ return SDValue();
+
Align Alignment = OriginalLoad->getAlign();
Align NewAlign = DAG.getDataLayout().getABITypeAlign(
VecEltVT.getTypeForEVT(*DAG.getContext()));
@@ -18202,20 +18706,24 @@ SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
// operands will all be based off of VecIn1, even those in VecIn2.
unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
+ uint64_t VTSize = VT.getFixedSizeInBits();
+ uint64_t InVT1Size = InVT1.getFixedSizeInBits();
+ uint64_t InVT2Size = InVT2.getFixedSizeInBits();
+
// We can't generate a shuffle node with mismatched input and output types.
// Try to make the types match the type of the output.
if (InVT1 != VT || InVT2 != VT) {
- if ((VT.getSizeInBits() % InVT1.getSizeInBits() == 0) && InVT1 == InVT2) {
+ if ((VTSize % InVT1Size == 0) && InVT1 == InVT2) {
// If the output vector length is a multiple of both input lengths,
// we can concatenate them and pad the rest with undefs.
- unsigned NumConcats = VT.getSizeInBits() / InVT1.getSizeInBits();
+ unsigned NumConcats = VTSize / InVT1Size;
assert(NumConcats >= 2 && "Concat needs at least two inputs!");
SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
ConcatOps[0] = VecIn1;
ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
VecIn2 = SDValue();
- } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) {
+ } else if (InVT1Size == VTSize * 2) {
if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
return SDValue();
@@ -18228,7 +18736,7 @@ SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
// Since we now have shorter input vectors, adjust the offset of the
// second vector's start.
Vec2Offset = NumElems;
- } else if (InVT2.getSizeInBits() <= InVT1.getSizeInBits()) {
+ } else if (InVT2Size <= InVT1Size) {
// VecIn1 is wider than the output, and we have another, possibly
// smaller input. Pad the smaller input with undefs, shuffle at the
// input vector width, and extract the output.
@@ -18253,8 +18761,7 @@ SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
// when we start sorting the vectors by type.
return SDValue();
}
- } else if (InVT2.getSizeInBits() * 2 == VT.getSizeInBits() &&
- InVT1.getSizeInBits() == VT.getSizeInBits()) {
+ } else if (InVT2Size * 2 == VTSize && InVT1Size == VTSize) {
SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
ConcatOps[0] = VecIn2;
VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
@@ -18445,8 +18952,7 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
// Have we seen this input vector before?
// The vectors are expected to be tiny (usually 1 or 2 elements), so using
// a map back from SDValues to numbers isn't worth it.
- unsigned Idx = std::distance(
- VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec));
+ unsigned Idx = std::distance(VecIn.begin(), find(VecIn, ExtractedFromVec));
if (Idx == VecIn.size())
VecIn.push_back(ExtractedFromVec);
@@ -18904,7 +19410,7 @@ static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG) {
// check the other type in the cast to make sure this is really legal.
EVT VT = N->getValueType(0);
EVT SrcEltVT = SrcVT.getVectorElementType();
- unsigned NumElts = SrcVT.getVectorElementCount().Min * N->getNumOperands();
+ ElementCount NumElts = SrcVT.getVectorElementCount() * N->getNumOperands();
EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
switch (CastOpcode) {
@@ -18941,9 +19447,8 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
return DAG.getUNDEF(VT);
// Optimize concat_vectors where all but the first of the vectors are undef.
- if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
- return Op.isUndef();
- })) {
+ if (all_of(drop_begin(N->ops()),
+ [](const SDValue &Op) { return Op.isUndef(); })) {
SDValue In = N->getOperand(0);
assert(In.getValueType().isVector() && "Must concat vectors");
@@ -19116,15 +19621,16 @@ static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT) {
auto *IndexC = dyn_cast<ConstantSDNode>(Index);
if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
V.getOperand(0).getValueType() == SubVT &&
- (IndexC->getZExtValue() % SubVT.getVectorNumElements()) == 0) {
- uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorNumElements();
+ (IndexC->getZExtValue() % SubVT.getVectorMinNumElements()) == 0) {
+ uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorMinNumElements();
return V.getOperand(SubIdx);
}
return SDValue();
}
static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
- SelectionDAG &DAG) {
+ SelectionDAG &DAG,
+ bool LegalOperations) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue BinOp = Extract->getOperand(0);
unsigned BinOpcode = BinOp.getOpcode();
@@ -19138,7 +19644,7 @@ static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
SDValue Index = Extract->getOperand(1);
EVT SubVT = Extract->getValueType(0);
- if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT))
+ if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT, LegalOperations))
return SDValue();
SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
@@ -19159,11 +19665,12 @@ static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
/// If we are extracting a subvector produced by a wide binary operator try
/// to use a narrow binary operator and/or avoid concatenation and extraction.
-static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
+static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG,
+ bool LegalOperations) {
// TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
// some of these bailouts with other transforms.
- if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG))
+ if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG, LegalOperations))
return V;
// The extract index must be a constant, so we can map it to a concat operand.
@@ -19308,19 +19815,15 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
return SDValue();
unsigned Index = ExtIdx->getZExtValue();
- unsigned NumElts = VT.getVectorNumElements();
+ unsigned NumElts = VT.getVectorMinNumElements();
- // If the index is a multiple of the extract element count, we can offset the
- // address by the store size multiplied by the subvector index. Otherwise if
- // the scalar type is byte sized, we can just use the index multiplied by
- // the element size in bytes as the offset.
- unsigned Offset;
- if (Index % NumElts == 0)
- Offset = (Index / NumElts) * VT.getStoreSize();
- else if (VT.getScalarType().isByteSized())
- Offset = Index * VT.getScalarType().getStoreSize();
- else
- return SDValue();
+ // The definition of EXTRACT_SUBVECTOR states that the index must be a
+ // multiple of the minimum number of elements in the result type.
+ assert(Index % NumElts == 0 && "The extract subvector index is not a "
+ "multiple of the result's element count");
+
+ // It's fine to use TypeSize here as we know the offset will not be negative.
+ TypeSize Offset = VT.getStoreSize() * (Index / NumElts);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
@@ -19329,13 +19832,21 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
// The narrow load will be offset from the base address of the old load if
// we are extracting from something besides index 0 (little-endian).
SDLoc DL(Extract);
- SDValue BaseAddr = Ld->getBasePtr();
// TODO: Use "BaseIndexOffset" to make this more effective.
- SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
+ SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL);
+
+ uint64_t StoreSize = MemoryLocation::getSizeOrUnknown(VT.getStoreSize());
MachineFunction &MF = DAG.getMachineFunction();
- MachineMemOperand *MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset,
- VT.getStoreSize());
+ MachineMemOperand *MMO;
+ if (Offset.isScalable()) {
+ MachinePointerInfo MPI =
+ MachinePointerInfo(Ld->getPointerInfo().getAddrSpace());
+ MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, StoreSize);
+ } else
+ MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedSize(),
+ StoreSize);
+
SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
return NewLd;
@@ -19388,8 +19899,9 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
}
if ((DestNumElts % SrcNumElts) == 0) {
unsigned DestSrcRatio = DestNumElts / SrcNumElts;
- if ((NVT.getVectorMinNumElements() % DestSrcRatio) == 0) {
- ElementCount NewExtEC = NVT.getVectorElementCount() / DestSrcRatio;
+ if (NVT.getVectorElementCount().isKnownMultipleOf(DestSrcRatio)) {
+ ElementCount NewExtEC =
+ NVT.getVectorElementCount().divideCoefficientBy(DestSrcRatio);
EVT ScalarVT = SrcVT.getScalarType();
if ((ExtIdx % DestSrcRatio) == 0) {
SDLoc DL(N);
@@ -19403,7 +19915,7 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
V.getOperand(0), NewIndex);
return DAG.getBitcast(NVT, NewExtract);
}
- if (NewExtEC == 1 &&
+ if (NewExtEC.isScalar() &&
TLI.isOperationLegalOrCustom(ISD::EXTRACT_VECTOR_ELT, ScalarVT)) {
SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
SDValue NewExtract =
@@ -19508,7 +20020,7 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
N->getOperand(1));
}
- if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG))
+ if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG, LegalOperations))
return NarrowBOp;
if (SimplifyDemandedVectorElts(SDValue(N, 0)))
@@ -20286,52 +20798,52 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
}
}
- // Canonicalize shuffles according to rules:
- // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
- // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
- // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
- if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
- N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
- TLI.isTypeLegal(VT)) {
- // The incoming shuffle must be of the same type as the result of the
- // current shuffle.
- assert(N1->getOperand(0).getValueType() == VT &&
- "Shuffle types don't match");
-
- SDValue SV0 = N1->getOperand(0);
- SDValue SV1 = N1->getOperand(1);
- bool HasSameOp0 = N0 == SV0;
- bool IsSV1Undef = SV1.isUndef();
- if (HasSameOp0 || IsSV1Undef || N0 == SV1)
- // Commute the operands of this shuffle so that next rule
- // will trigger.
+ if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
+ // Canonicalize shuffles according to rules:
+ // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
+ // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
+ // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
+ if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
+ N0.getOpcode() != ISD::VECTOR_SHUFFLE) {
+ // The incoming shuffle must be of the same type as the result of the
+ // current shuffle.
+ assert(N1->getOperand(0).getValueType() == VT &&
+ "Shuffle types don't match");
+
+ SDValue SV0 = N1->getOperand(0);
+ SDValue SV1 = N1->getOperand(1);
+ bool HasSameOp0 = N0 == SV0;
+ bool IsSV1Undef = SV1.isUndef();
+ if (HasSameOp0 || IsSV1Undef || N0 == SV1)
+ // Commute the operands of this shuffle so merging below will trigger.
+ return DAG.getCommutedVectorShuffle(*SVN);
+ }
+
+ // Canonicalize splat shuffles to the RHS to improve merging below.
+ // shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), splat(A,u))
+ if (N0.getOpcode() == ISD::VECTOR_SHUFFLE &&
+ N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
+ cast<ShuffleVectorSDNode>(N0)->isSplat() &&
+ !cast<ShuffleVectorSDNode>(N1)->isSplat()) {
return DAG.getCommutedVectorShuffle(*SVN);
+ }
}
- // Try to fold according to rules:
- // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
- // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
- // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
- // Don't try to fold shuffles with illegal type.
- // Only fold if this shuffle is the only user of the other shuffle.
- if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
- Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
- ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
-
+ // Compute the combined shuffle mask for a shuffle with SV0 as the first
+ // operand, and SV1 as the second operand.
+ // i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask).
+ auto MergeInnerShuffle = [NumElts](ShuffleVectorSDNode *SVN,
+ ShuffleVectorSDNode *OtherSVN, SDValue N1,
+ SDValue &SV0, SDValue &SV1,
+ SmallVectorImpl<int> &Mask) -> bool {
// Don't try to fold splats; they're likely to simplify somehow, or they
// might be free.
- if (OtherSV->isSplat())
- return SDValue();
+ if (OtherSVN->isSplat())
+ return false;
- // The incoming shuffle must be of the same type as the result of the
- // current shuffle.
- assert(OtherSV->getOperand(0).getValueType() == VT &&
- "Shuffle types don't match");
+ SV0 = SV1 = SDValue();
+ Mask.clear();
- SDValue SV0, SV1;
- SmallVector<int, 4> Mask;
- // Compute the combined shuffle mask for a shuffle with SV0 as the first
- // operand, and SV1 as the second operand.
for (unsigned i = 0; i != NumElts; ++i) {
int Idx = SVN->getMaskElt(i);
if (Idx < 0) {
@@ -20344,15 +20856,14 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
if (Idx < (int)NumElts) {
// This shuffle index refers to the inner shuffle N0. Lookup the inner
// shuffle mask to identify which vector is actually referenced.
- Idx = OtherSV->getMaskElt(Idx);
+ Idx = OtherSVN->getMaskElt(Idx);
if (Idx < 0) {
// Propagate Undef.
Mask.push_back(Idx);
continue;
}
-
- CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0)
- : OtherSV->getOperand(1);
+ CurrentVec = (Idx < (int)NumElts) ? OtherSVN->getOperand(0)
+ : OtherSVN->getOperand(1);
} else {
// This shuffle index references an element within N1.
CurrentVec = N1;
@@ -20374,38 +20885,82 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
Mask.push_back(Idx);
continue;
}
+ if (!SV1.getNode() || SV1 == CurrentVec) {
+ // Ok. CurrentVec is the right hand side.
+ // Update the mask accordingly.
+ SV1 = CurrentVec;
+ Mask.push_back(Idx + NumElts);
+ continue;
+ }
- // Bail out if we cannot convert the shuffle pair into a single shuffle.
- if (SV1.getNode() && SV1 != CurrentVec)
- return SDValue();
+ // Last chance - see if the vector is another shuffle and if it
+ // uses one of the existing candidate shuffle ops.
+ if (auto *CurrentSVN = dyn_cast<ShuffleVectorSDNode>(CurrentVec)) {
+ int InnerIdx = CurrentSVN->getMaskElt(Idx);
+ if (InnerIdx < 0) {
+ Mask.push_back(-1);
+ continue;
+ }
+ SDValue InnerVec = (InnerIdx < (int)NumElts)
+ ? CurrentSVN->getOperand(0)
+ : CurrentSVN->getOperand(1);
+ if (InnerVec.isUndef()) {
+ Mask.push_back(-1);
+ continue;
+ }
+ InnerIdx %= NumElts;
+ if (InnerVec == SV0) {
+ Mask.push_back(InnerIdx);
+ continue;
+ }
+ if (InnerVec == SV1) {
+ Mask.push_back(InnerIdx + NumElts);
+ continue;
+ }
+ }
- // Ok. CurrentVec is the right hand side.
- // Update the mask accordingly.
- SV1 = CurrentVec;
- Mask.push_back(Idx + NumElts);
+ // Bail out if we cannot convert the shuffle pair into a single shuffle.
+ return false;
}
+ return true;
+ };
+
+ // Try to fold according to rules:
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
+ // Don't try to fold shuffles with illegal type.
+ // Only fold if this shuffle is the only user of the other shuffle.
+ if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
+ Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
+ ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
- // Check if all indices in Mask are Undef. In case, propagate Undef.
- bool isUndefMask = true;
- for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
- isUndefMask &= Mask[i] < 0;
+ // The incoming shuffle must be of the same type as the result of the
+ // current shuffle.
+ assert(OtherSV->getOperand(0).getValueType() == VT &&
+ "Shuffle types don't match");
- if (isUndefMask)
- return DAG.getUNDEF(VT);
+ SDValue SV0, SV1;
+ SmallVector<int, 4> Mask;
+ if (MergeInnerShuffle(SVN, OtherSV, N1, SV0, SV1, Mask)) {
+ // Check if all indices in Mask are Undef. In case, propagate Undef.
+ if (llvm::all_of(Mask, [](int M) { return M < 0; }))
+ return DAG.getUNDEF(VT);
- if (!SV0.getNode())
- SV0 = DAG.getUNDEF(VT);
- if (!SV1.getNode())
- SV1 = DAG.getUNDEF(VT);
+ if (!SV0.getNode())
+ SV0 = DAG.getUNDEF(VT);
+ if (!SV1.getNode())
+ SV1 = DAG.getUNDEF(VT);
- // Avoid introducing shuffles with illegal mask.
- // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
- // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
- // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
- // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
- // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
- // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
- return TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask, DAG);
+ // Avoid introducing shuffles with illegal mask.
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
+ return TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask, DAG);
+ }
}
if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
@@ -20490,8 +21045,8 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
N1.getOperand(0).getOperand(1) == N2 &&
- N1.getOperand(0).getOperand(0).getValueType().getVectorNumElements() ==
- VT.getVectorNumElements() &&
+ N1.getOperand(0).getOperand(0).getValueType().getVectorElementCount() ==
+ VT.getVectorElementCount() &&
N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
VT.getSizeInBits()) {
return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
@@ -20508,7 +21063,7 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
EVT CN1VT = CN1.getValueType();
if (CN0VT.isVector() && CN1VT.isVector() &&
CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
- CN0VT.getVectorNumElements() == VT.getVectorNumElements()) {
+ CN0VT.getVectorElementCount() == VT.getVectorElementCount()) {
SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
CN0.getValueType(), CN0, CN1, N2);
return DAG.getBitcast(VT, NewINSERT);
@@ -20547,7 +21102,7 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
SDLoc DL(N);
SDValue NewIdx;
LLVMContext &Ctx = *DAG.getContext();
- unsigned NumElts = VT.getVectorNumElements();
+ ElementCount NumElts = VT.getVectorElementCount();
unsigned EltSizeInBits = VT.getScalarSizeInBits();
if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
@@ -20555,8 +21110,9 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL);
} else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
- if ((NumElts % Scale) == 0 && (InsIdx % Scale) == 0) {
- NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts / Scale);
+ if (NumElts.isKnownMultipleOf(Scale) && (InsIdx % Scale) == 0) {
+ NewVT = EVT::getVectorVT(Ctx, N1SrcSVT,
+ NumElts.divideCoefficientBy(Scale));
NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL);
}
}
@@ -20588,8 +21144,10 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
// If the input vector is a concatenation, and the insert replaces
// one of the pieces, we can optimize into a single concat_vectors.
if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
- N0.getOperand(0).getValueType() == N1.getValueType()) {
- unsigned Factor = N1.getValueType().getVectorNumElements();
+ N0.getOperand(0).getValueType() == N1.getValueType() &&
+ N0.getOperand(0).getValueType().isScalableVector() ==
+ N1.getValueType().isScalableVector()) {
+ unsigned Factor = N1.getValueType().getVectorMinNumElements();
SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
Ops[InsIdx / Factor] = N1;
return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
@@ -20633,7 +21191,7 @@ SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
unsigned Opcode = N->getOpcode();
// VECREDUCE over 1-element vector is just an extract.
- if (VT.getVectorNumElements() == 1) {
+ if (VT.getVectorElementCount().isScalar()) {
SDLoc dl(N);
SDValue Res =
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0,
@@ -20872,7 +21430,8 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
SDValue Z = LHS.getOperand(2);
EVT NarrowVT = X.getValueType();
if (NarrowVT == Y.getValueType() &&
- TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
+ TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT,
+ LegalOperations)) {
// (binop undef, undef) may not return undef, so compute that result.
SDLoc DL(N);
SDValue VecC =
@@ -20885,11 +21444,10 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
// Make sure all but the first op are undef or constant.
auto ConcatWithConstantOrUndef = [](SDValue Concat) {
return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
- std::all_of(std::next(Concat->op_begin()), Concat->op_end(),
- [](const SDValue &Op) {
- return Op.isUndef() ||
- ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
- });
+ all_of(drop_begin(Concat->ops()), [](const SDValue &Op) {
+ return Op.isUndef() ||
+ ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
+ });
};
// The following pattern is likely to emerge with vector reduction ops. Moving
@@ -21111,7 +21669,7 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
// It is safe to replace the two loads if they have different alignments,
// but the new load must be the minimum (most restrictive) alignment of the
// inputs.
- unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment());
+ Align Alignment = std::min(LLD->getAlign(), RLD->getAlign());
MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
if (!RLD->isInvariant())
MMOFlags &= ~MachineMemOperand::MOInvariant;
@@ -21217,6 +21775,46 @@ SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
}
+// Transform (fneg/fabs (bitconvert x)) to avoid loading constant pool values.
+SDValue DAGCombiner::foldSignChangeInBitcast(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ bool IsFabs = N->getOpcode() == ISD::FABS;
+ bool IsFree = IsFabs ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
+
+ if (IsFree || N0.getOpcode() != ISD::BITCAST || !N0.hasOneUse())
+ return SDValue();
+
+ SDValue Int = N0.getOperand(0);
+ EVT IntVT = Int.getValueType();
+
+ // The operand to cast should be integer.
+ if (!IntVT.isInteger() || IntVT.isVector())
+ return SDValue();
+
+ // (fneg (bitconvert x)) -> (bitconvert (xor x sign))
+ // (fabs (bitconvert x)) -> (bitconvert (and x ~sign))
+ APInt SignMask;
+ if (N0.getValueType().isVector()) {
+ // For vector, create a sign mask (0x80...) or its inverse (for fabs,
+ // 0x7f...) per element and splat it.
+ SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
+ if (IsFabs)
+ SignMask = ~SignMask;
+ SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
+ } else {
+ // For scalar, just use the sign mask (0x80... or the inverse, 0x7f...)
+ SignMask = APInt::getSignMask(IntVT.getSizeInBits());
+ if (IsFabs)
+ SignMask = ~SignMask;
+ }
+ SDLoc DL(N0);
+ Int = DAG.getNode(IsFabs ? ISD::AND : ISD::XOR, DL, IntVT, Int,
+ DAG.getConstant(SignMask, DL, IntVT));
+ AddToWorklist(Int.getNode());
+ return DAG.getBitcast(VT, Int);
+}
+
/// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
/// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
/// in it. This may be a win when the constant is not otherwise available
@@ -21498,9 +22096,8 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) {
/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
EVT VT = V.getValueType();
- unsigned EltBits = VT.getScalarSizeInBits();
SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
- SDValue Base = DAG.getConstant(EltBits - 1, DL, VT);
+ SDValue Base = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
return LogBase2;
}
@@ -21678,37 +22275,21 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
Reciprocal)) {
AddToWorklist(Est.getNode());
- if (Iterations) {
+ if (Iterations)
Est = UseOneConstNR
? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
: buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
-
- if (!Reciprocal) {
- // The estimate is now completely wrong if the input was exactly 0.0 or
- // possibly a denormal. Force the answer to 0.0 for those cases.
- SDLoc DL(Op);
- EVT CCVT = getSetCCResultType(VT);
- ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT;
- DenormalMode DenormMode = DAG.getDenormalMode(VT);
- if (DenormMode.Input == DenormalMode::IEEE) {
- // This is specifically a check for the handling of denormal inputs,
- // not the result.
-
- // fabs(X) < SmallestNormal ? 0.0 : Est
- const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
- APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
- SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
- SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
- SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
- SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
- Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est);
- } else {
- // X == 0.0 ? 0.0 : Est
- SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
- SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
- Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est);
- }
- }
+ if (!Reciprocal) {
+ SDLoc DL(Op);
+ // Try the target specific test first.
+ SDValue Test = TLI.getSqrtInputTest(Op, DAG, DAG.getDenormalMode(VT));
+
+ // The estimate is now completely wrong if the input was exactly 0.0 or
+ // possibly a denormal. Force the answer to 0.0 or value provided by
+ // target for those cases.
+ Est = DAG.getNode(
+ Test.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
+ Test, TLI.getSqrtResultForDenormInput(Op, DAG), Est);
}
return Est;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index f5948d2a20dc..62f7f3d98ba6 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -113,11 +113,6 @@ using namespace PatternMatch;
#define DEBUG_TYPE "isel"
-// FIXME: Remove this after the feature has proven reliable.
-static cl::opt<bool> SinkLocalValues("fast-isel-sink-local-values",
- cl::init(true), cl::Hidden,
- cl::desc("Sink local values in FastISel"));
-
STATISTIC(NumFastIselSuccessIndependent, "Number of insts selected by "
"target-independent selector");
STATISTIC(NumFastIselSuccessTarget, "Number of insts selected by "
@@ -139,7 +134,6 @@ void FastISel::startNewBlock() {
LastLocalValue = EmitStartPt;
}
-/// Flush the local CSE map and sink anything we can.
void FastISel::finishBasicBlock() { flushLocalValueMap(); }
bool FastISel::lowerArguments() {
@@ -164,48 +158,77 @@ bool FastISel::lowerArguments() {
/// Return the defined register if this instruction defines exactly one
/// virtual register and uses no other virtual registers. Otherwise return 0.
-static Register findSinkableLocalRegDef(MachineInstr &MI) {
+static Register findLocalRegDef(MachineInstr &MI) {
Register RegDef;
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg())
continue;
if (MO.isDef()) {
if (RegDef)
- return 0;
+ return Register();
RegDef = MO.getReg();
} else if (MO.getReg().isVirtual()) {
- // This is another use of a vreg. Don't try to sink it.
+ // This is another use of a vreg. Don't delete it.
return Register();
}
}
return RegDef;
}
+static bool isRegUsedByPhiNodes(Register DefReg,
+ FunctionLoweringInfo &FuncInfo) {
+ for (auto &P : FuncInfo.PHINodesToUpdate)
+ if (P.second == DefReg)
+ return true;
+ return false;
+}
+
void FastISel::flushLocalValueMap() {
- // Try to sink local values down to their first use so that we can give them a
- // better debug location. This has the side effect of shrinking local value
- // live ranges, which helps out fast regalloc.
- if (SinkLocalValues && LastLocalValue != EmitStartPt) {
- // Sink local value materialization instructions between EmitStartPt and
- // LastLocalValue. Visit them bottom-up, starting from LastLocalValue, to
- // avoid inserting into the range that we're iterating over.
+ // If FastISel bails out, it could leave local value instructions behind
+ // that aren't used for anything. Detect and erase those.
+ if (LastLocalValue != EmitStartPt) {
+ // Save the first instruction after local values, for later.
+ MachineBasicBlock::iterator FirstNonValue(LastLocalValue);
+ ++FirstNonValue;
+
MachineBasicBlock::reverse_iterator RE =
EmitStartPt ? MachineBasicBlock::reverse_iterator(EmitStartPt)
: FuncInfo.MBB->rend();
MachineBasicBlock::reverse_iterator RI(LastLocalValue);
-
- InstOrderMap OrderMap;
for (; RI != RE;) {
MachineInstr &LocalMI = *RI;
+ // Increment before erasing what it points to.
++RI;
- bool Store = true;
- if (!LocalMI.isSafeToMove(nullptr, Store))
+ Register DefReg = findLocalRegDef(LocalMI);
+ if (!DefReg)
continue;
- Register DefReg = findSinkableLocalRegDef(LocalMI);
- if (DefReg == 0)
+ if (FuncInfo.RegsWithFixups.count(DefReg))
continue;
+ bool UsedByPHI = isRegUsedByPhiNodes(DefReg, FuncInfo);
+ if (!UsedByPHI && MRI.use_nodbg_empty(DefReg)) {
+ if (EmitStartPt == &LocalMI)
+ EmitStartPt = EmitStartPt->getPrevNode();
+ LLVM_DEBUG(dbgs() << "removing dead local value materialization"
+ << LocalMI);
+ LocalMI.eraseFromParent();
+ }
+ }
- sinkLocalValueMaterialization(LocalMI, DefReg, OrderMap);
+ if (FirstNonValue != FuncInfo.MBB->end()) {
+ // See if there are any local value instructions left. If so, we want to
+ // make sure the first one has a debug location; if it doesn't, use the
+ // first non-value instruction's debug location.
+
+ // If EmitStartPt is non-null, this block had copies at the top before
+ // FastISel started doing anything; it points to the last one, so the
+ // first local value instruction is the one after EmitStartPt.
+ // If EmitStartPt is null, the first local value instruction is at the
+ // top of the block.
+ MachineBasicBlock::iterator FirstLocalValue =
+ EmitStartPt ? ++MachineBasicBlock::iterator(EmitStartPt)
+ : FuncInfo.MBB->begin();
+ if (FirstLocalValue != FirstNonValue && !FirstLocalValue->getDebugLoc())
+ FirstLocalValue->setDebugLoc(FirstNonValue->getDebugLoc());
}
}
@@ -213,132 +236,6 @@ void FastISel::flushLocalValueMap() {
LastLocalValue = EmitStartPt;
recomputeInsertPt();
SavedInsertPt = FuncInfo.InsertPt;
- LastFlushPoint = FuncInfo.InsertPt;
-}
-
-static bool isRegUsedByPhiNodes(Register DefReg,
- FunctionLoweringInfo &FuncInfo) {
- for (auto &P : FuncInfo.PHINodesToUpdate)
- if (P.second == DefReg)
- return true;
- return false;
-}
-
-static bool isTerminatingEHLabel(MachineBasicBlock *MBB, MachineInstr &MI) {
- // Ignore non-EH labels.
- if (!MI.isEHLabel())
- return false;
-
- // Any EH label outside a landing pad must be for an invoke. Consider it a
- // terminator.
- if (!MBB->isEHPad())
- return true;
-
- // If this is a landingpad, the first non-phi instruction will be an EH_LABEL.
- // Don't consider that label to be a terminator.
- return MI.getIterator() != MBB->getFirstNonPHI();
-}
-
-/// Build a map of instruction orders. Return the first terminator and its
-/// order. Consider EH_LABEL instructions to be terminators as well, since local
-/// values for phis after invokes must be materialized before the call.
-void FastISel::InstOrderMap::initialize(
- MachineBasicBlock *MBB, MachineBasicBlock::iterator LastFlushPoint) {
- unsigned Order = 0;
- for (MachineInstr &I : *MBB) {
- if (!FirstTerminator &&
- (I.isTerminator() || isTerminatingEHLabel(MBB, I))) {
- FirstTerminator = &I;
- FirstTerminatorOrder = Order;
- }
- Orders[&I] = Order++;
-
- // We don't need to order instructions past the last flush point.
- if (I.getIterator() == LastFlushPoint)
- break;
- }
-}
-
-void FastISel::sinkLocalValueMaterialization(MachineInstr &LocalMI,
- Register DefReg,
- InstOrderMap &OrderMap) {
- // If this register is used by a register fixup, MRI will not contain all
- // the uses until after register fixups, so don't attempt to sink or DCE
- // this instruction. Register fixups typically come from no-op cast
- // instructions, which replace the cast instruction vreg with the local
- // value vreg.
- if (FuncInfo.RegsWithFixups.count(DefReg))
- return;
-
- // We can DCE this instruction if there are no uses and it wasn't a
- // materialized for a successor PHI node.
- bool UsedByPHI = isRegUsedByPhiNodes(DefReg, FuncInfo);
- if (!UsedByPHI && MRI.use_nodbg_empty(DefReg)) {
- if (EmitStartPt == &LocalMI)
- EmitStartPt = EmitStartPt->getPrevNode();
- LLVM_DEBUG(dbgs() << "removing dead local value materialization "
- << LocalMI);
- OrderMap.Orders.erase(&LocalMI);
- LocalMI.eraseFromParent();
- return;
- }
-
- // Number the instructions if we haven't yet so we can efficiently find the
- // earliest use.
- if (OrderMap.Orders.empty())
- OrderMap.initialize(FuncInfo.MBB, LastFlushPoint);
-
- // Find the first user in the BB.
- MachineInstr *FirstUser = nullptr;
- unsigned FirstOrder = std::numeric_limits<unsigned>::max();
- for (MachineInstr &UseInst : MRI.use_nodbg_instructions(DefReg)) {
- auto I = OrderMap.Orders.find(&UseInst);
- assert(I != OrderMap.Orders.end() &&
- "local value used by instruction outside local region");
- unsigned UseOrder = I->second;
- if (UseOrder < FirstOrder) {
- FirstOrder = UseOrder;
- FirstUser = &UseInst;
- }
- }
-
- // The insertion point will be the first terminator or the first user,
- // whichever came first. If there was no terminator, this must be a
- // fallthrough block and the insertion point is the end of the block.
- MachineBasicBlock::instr_iterator SinkPos;
- if (UsedByPHI && OrderMap.FirstTerminatorOrder < FirstOrder) {
- FirstOrder = OrderMap.FirstTerminatorOrder;
- SinkPos = OrderMap.FirstTerminator->getIterator();
- } else if (FirstUser) {
- SinkPos = FirstUser->getIterator();
- } else {
- assert(UsedByPHI && "must be users if not used by a phi");
- SinkPos = FuncInfo.MBB->instr_end();
- }
-
- // Collect all DBG_VALUEs before the new insertion position so that we can
- // sink them.
- SmallVector<MachineInstr *, 1> DbgValues;
- for (MachineInstr &DbgVal : MRI.use_instructions(DefReg)) {
- if (!DbgVal.isDebugValue())
- continue;
- unsigned UseOrder = OrderMap.Orders[&DbgVal];
- if (UseOrder < FirstOrder)
- DbgValues.push_back(&DbgVal);
- }
-
- // Sink LocalMI before SinkPos and assign it the same DebugLoc.
- LLVM_DEBUG(dbgs() << "sinking local value to first use " << LocalMI);
- FuncInfo.MBB->remove(&LocalMI);
- FuncInfo.MBB->insert(SinkPos, &LocalMI);
- if (SinkPos != FuncInfo.MBB->end())
- LocalMI.setDebugLoc(SinkPos->getDebugLoc());
-
- // Sink any debug values that we've collected.
- for (MachineInstr *DI : DbgValues) {
- FuncInfo.MBB->remove(DI);
- FuncInfo.MBB->insert(SinkPos, DI);
- }
}
bool FastISel::hasTrivialKill(const Value *V) {
@@ -446,7 +343,7 @@ Register FastISel::materializeConstant(const Value *V, MVT VT) {
getRegForValue(ConstantInt::get(V->getContext(), SIntVal));
if (IntegerReg)
Reg = fastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP, IntegerReg,
- /*Kill=*/false);
+ /*Op0IsKill=*/false);
}
}
} else if (const auto *Op = dyn_cast<Operator>(V)) {
@@ -560,8 +457,6 @@ void FastISel::removeDeadCode(MachineBasicBlock::iterator I,
assert(I.isValid() && E.isValid() && std::distance(I, E) > 0 &&
"Invalid iterator!");
while (I != E) {
- if (LastFlushPoint == I)
- LastFlushPoint = E;
if (SavedInsertPt == I)
SavedInsertPt = E;
if (EmitStartPt == I)
@@ -578,12 +473,9 @@ void FastISel::removeDeadCode(MachineBasicBlock::iterator I,
}
FastISel::SavePoint FastISel::enterLocalValueArea() {
- MachineBasicBlock::iterator OldInsertPt = FuncInfo.InsertPt;
- DebugLoc OldDL = DbgLoc;
+ SavePoint OldInsertPt = FuncInfo.InsertPt;
recomputeInsertPt();
- DbgLoc = DebugLoc();
- SavePoint SP = {OldInsertPt, OldDL};
- return SP;
+ return OldInsertPt;
}
void FastISel::leaveLocalValueArea(SavePoint OldInsertPt) {
@@ -591,8 +483,7 @@ void FastISel::leaveLocalValueArea(SavePoint OldInsertPt) {
LastLocalValue = &*std::prev(FuncInfo.InsertPt);
// Restore the previous insert position.
- FuncInfo.InsertPt = OldInsertPt.InsertPt;
- DbgLoc = OldInsertPt.DL;
+ FuncInfo.InsertPt = OldInsertPt;
}
bool FastISel::selectBinaryOp(const User *I, unsigned ISDOpcode) {
@@ -1316,11 +1207,6 @@ bool FastISel::selectCall(const User *I) {
// Handle simple inline asms.
if (const InlineAsm *IA = dyn_cast<InlineAsm>(Call->getCalledOperand())) {
- // If the inline asm has side effects, then make sure that no local value
- // lives across by flushing the local value map.
- if (IA->hasSideEffects())
- flushLocalValueMap();
-
// Don't attempt to handle constraints.
if (!IA->getConstraintString().empty())
return false;
@@ -1350,15 +1236,6 @@ bool FastISel::selectCall(const User *I) {
if (const auto *II = dyn_cast<IntrinsicInst>(Call))
return selectIntrinsicCall(II);
- // Usually, it does not make sense to initialize a value,
- // make an unrelated function call and use the value, because
- // it tends to be spilled on the stack. So, we move the pointer
- // to the last local value to the beginning of the block, so that
- // all the values which have already been materialized,
- // appear after the call. It also makes sense to skip intrinsics
- // since they tend to be inlined.
- flushLocalValueMap();
-
return lowerCall(Call);
}
@@ -1375,6 +1252,8 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
case Intrinsic::sideeffect:
// Neither does the assume intrinsic; it's also OK not to codegen its operand.
case Intrinsic::assume:
+ // Neither does the llvm.experimental.noalias.scope.decl intrinsic
+ case Intrinsic::experimental_noalias_scope_decl:
return true;
case Intrinsic::dbg_declare: {
const DbgDeclareInst *DI = cast<DbgDeclareInst>(II);
@@ -1643,6 +1522,11 @@ void FastISel::removeDeadLocalValueCode(MachineInstr *SavedLastLocalValue)
}
bool FastISel::selectInstruction(const Instruction *I) {
+ // Flush the local value map before starting each instruction.
+ // This improves locality and debugging, and can reduce spills.
+ // Reuse of values across IR instructions is relatively uncommon.
+ flushLocalValueMap();
+
MachineInstr *SavedLastLocalValue = getLastLocalValue();
// Just before the terminator instruction, insert instructions to
// feed PHI nodes in successor blocks.
@@ -1789,13 +1673,13 @@ bool FastISel::selectFNeg(const User *I, const Value *In) {
return false;
Register IntResultReg = fastEmit_ri_(
- IntVT.getSimpleVT(), ISD::XOR, IntReg, /*IsKill=*/true,
+ IntVT.getSimpleVT(), ISD::XOR, IntReg, /*Op0IsKill=*/true,
UINT64_C(1) << (VT.getSizeInBits() - 1), IntVT.getSimpleVT());
if (!IntResultReg)
return false;
ResultReg = fastEmit_r(IntVT.getSimpleVT(), VT.getSimpleVT(), ISD::BITCAST,
- IntResultReg, /*IsKill=*/true);
+ IntResultReg, /*Op0IsKill=*/true);
if (!ResultReg)
return false;
@@ -1851,13 +1735,8 @@ bool FastISel::selectOperator(const User *I, unsigned Opcode) {
return selectBinaryOp(I, ISD::FADD);
case Instruction::Sub:
return selectBinaryOp(I, ISD::SUB);
- case Instruction::FSub: {
- // FNeg is currently represented in LLVM IR as a special case of FSub.
- Value *X;
- if (match(I, m_FNeg(m_Value(X))))
- return selectFNeg(I, X);
+ case Instruction::FSub:
return selectBinaryOp(I, ISD::FSUB);
- }
case Instruction::Mul:
return selectBinaryOp(I, ISD::MUL);
case Instruction::FMul:
@@ -2353,9 +2232,9 @@ bool FastISel::handlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
const Value *PHIOp = PN.getIncomingValueForBlock(LLVMBB);
- // Set the DebugLoc for the copy. Prefer the location of the operand
- // if there is one; use the location of the PHI otherwise.
- DbgLoc = PN.getDebugLoc();
+ // Set the DebugLoc for the copy. Use the location of the operand if
+ // there is one; otherwise no location, flushLocalValueMap will fix it.
+ DbgLoc = DebugLoc();
if (const auto *Inst = dyn_cast<Instruction>(PHIOp))
DbgLoc = Inst->getDebugLoc();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index 5cf83cff3a90..32a4f60df097 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -197,7 +197,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
// Look for inline asm that clobbers the SP register.
if (auto *Call = dyn_cast<CallBase>(&I)) {
if (Call->isInlineAsm()) {
- unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
+ Register SP = TLI->getStackPointerRegisterToSaveRestore();
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
std::vector<TargetLowering::AsmOperandInfo> Ops =
TLI->ParseConstraints(Fn->getParent()->getDataLayout(), TRI,
@@ -360,7 +360,7 @@ void FunctionLoweringInfo::clear() {
RegFixups.clear();
RegsWithFixups.clear();
StatepointStackSlots.clear();
- StatepointSpillMaps.clear();
+ StatepointRelocationMaps.clear();
PreferredExtendType.clear();
}
@@ -458,8 +458,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
APInt Val = CI->getValue().zextOrTrunc(BitWidth);
DestLOI.NumSignBits = Val.getNumSignBits();
- DestLOI.Known.Zero = ~Val;
- DestLOI.Known.One = Val;
+ DestLOI.Known = KnownBits::makeConstant(Val);
} else {
assert(ValueMap.count(V) && "V should have been placed in ValueMap when its"
"CopyToReg node was created.");
@@ -509,8 +508,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
return;
}
DestLOI.NumSignBits = std::min(DestLOI.NumSignBits, SrcLOI->NumSignBits);
- DestLOI.Known.Zero &= SrcLOI->Known.Zero;
- DestLOI.Known.One &= SrcLOI->Known.One;
+ DestLOI.Known = KnownBits::commonBits(DestLOI.Known, SrcLOI->Known);
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 0e4e99214aa2..a5978711b871 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -26,6 +26,7 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/PseudoProbe.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
@@ -200,6 +201,8 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node,
bool HasVRegVariadicDefs = !MF->getTarget().usesPhysRegsForValues() &&
II.isVariadic() && II.variadicOpsAreDefs();
unsigned NumVRegs = HasVRegVariadicDefs ? NumResults : II.getNumDefs();
+ if (Node->getMachineOpcode() == TargetOpcode::STATEPOINT)
+ NumVRegs = NumResults;
for (unsigned i = 0; i < NumVRegs; ++i) {
// If the specific node value is only used by a CopyToReg and the dest reg
// is a vreg in the same register class, use the CopyToReg'd destination
@@ -693,6 +696,11 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,
return &*MIB;
}
+ // Attempt to produce a DBG_INSTR_REF if we've been asked to.
+ if (EmitDebugInstrRefs)
+ if (auto *InstrRef = EmitDbgInstrRef(SD, VRBaseMap))
+ return InstrRef;
+
if (SD->getKind() == SDDbgValue::FRAMEIX) {
// Stack address; this needs to be lowered in target-dependent fashion.
// EmitTargetCodeForFrameDebugValue is responsible for allocation.
@@ -760,6 +768,63 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,
}
MachineInstr *
+InstrEmitter::EmitDbgInstrRef(SDDbgValue *SD,
+ DenseMap<SDValue, Register> &VRBaseMap) {
+ // Instruction referencing is still in a prototype state: for now we're only
+ // going to support SDNodes within a block. Copies are not supported, they
+ // don't actually define a value.
+ if (SD->getKind() != SDDbgValue::SDNODE)
+ return nullptr;
+
+ SDNode *Node = SD->getSDNode();
+ SDValue Op = SDValue(Node, SD->getResNo());
+ DenseMap<SDValue, Register>::iterator I = VRBaseMap.find(Op);
+ if (I==VRBaseMap.end())
+ return nullptr; // undef value: let EmitDbgValue produce a DBG_VALUE $noreg.
+
+ MDNode *Var = SD->getVariable();
+ MDNode *Expr = SD->getExpression();
+ DebugLoc DL = SD->getDebugLoc();
+
+ // Try to pick out a defining instruction at this point.
+ unsigned VReg = getVR(Op, VRBaseMap);
+ MachineInstr *ResultInstr = nullptr;
+
+ // No definition corresponds to scenarios where a vreg is live-in to a block,
+ // and doesn't have a defining instruction (yet). This can be patched up
+ // later; at this early stage of implementation, fall back to using DBG_VALUE.
+ if (!MRI->hasOneDef(VReg))
+ return nullptr;
+
+ MachineInstr &DefMI = *MRI->def_instr_begin(VReg);
+ // Some target specific opcodes can become copies. As stated above, we're
+ // ignoring those for now.
+ if (DefMI.isCopy() || DefMI.getOpcode() == TargetOpcode::SUBREG_TO_REG)
+ return nullptr;
+
+ const MCInstrDesc &RefII = TII->get(TargetOpcode::DBG_INSTR_REF);
+ auto MIB = BuildMI(*MF, DL, RefII);
+
+ // Find the operand which defines the specified VReg.
+ unsigned OperandIdx = 0;
+ for (const auto &MO : DefMI.operands()) {
+ if (MO.isReg() && MO.isDef() && MO.getReg() == VReg)
+ break;
+ ++OperandIdx;
+ }
+ assert(OperandIdx < DefMI.getNumOperands());
+
+ // Make the DBG_INSTR_REF refer to that instruction, and that operand.
+ unsigned InstrNum = DefMI.getDebugInstrNum();
+ MIB.addImm(InstrNum);
+ MIB.addImm(OperandIdx);
+ MIB.addMetadata(Var);
+ MIB.addMetadata(Expr);
+ ResultInstr = &*MIB;
+ return ResultInstr;
+}
+
+MachineInstr *
InstrEmitter::EmitDbgLabel(SDDbgLabel *SD) {
MDNode *Label = SD->getLabel();
DebugLoc DL = SD->getDebugLoc();
@@ -821,6 +886,8 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
NumDefs = NumResults;
}
ScratchRegs = TLI->getScratchRegisters((CallingConv::ID) CC);
+ } else if (Opc == TargetOpcode::STATEPOINT) {
+ NumDefs = NumResults;
}
unsigned NumImpUses = 0;
@@ -970,6 +1037,22 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
if (!UsedRegs.empty() || II.getImplicitDefs() || II.hasOptionalDef())
MIB->setPhysRegsDeadExcept(UsedRegs, *TRI);
+ // STATEPOINT is too 'dynamic' to have meaningful machine description.
+ // We have to manually tie operands.
+ if (Opc == TargetOpcode::STATEPOINT && NumDefs > 0) {
+ assert(!HasPhysRegOuts && "STATEPOINT mishandled");
+ MachineInstr *MI = MIB;
+ unsigned Def = 0;
+ int First = StatepointOpers(MI).getFirstGCPtrIdx();
+ assert(First > 0 && "Statepoint has Defs but no GC ptr list");
+ unsigned Use = (unsigned)First;
+ while (Def < NumDefs) {
+ if (MI->getOperand(Use).isReg())
+ MI->tieOperands(Def++, Use);
+ Use = StackMaps::getNextMetaArgIdx(MI, Use);
+ }
+ }
+
// Run post-isel target hook to adjust this instruction if needed.
if (II.hasPostISelHook())
TLI->AdjustInstrPostInstrSelection(*MIB, Node);
@@ -1042,6 +1125,20 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
break;
}
+ case ISD::PSEUDO_PROBE: {
+ unsigned TarOp = TargetOpcode::PSEUDO_PROBE;
+ auto Guid = cast<PseudoProbeSDNode>(Node)->getGuid();
+ auto Index = cast<PseudoProbeSDNode>(Node)->getIndex();
+ auto Attr = cast<PseudoProbeSDNode>(Node)->getAttributes();
+
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TarOp))
+ .addImm(Guid)
+ .addImm(Index)
+ .addImm((uint8_t)PseudoProbeType::Block)
+ .addImm(Attr);
+ break;
+ }
+
case ISD::INLINEASM:
case ISD::INLINEASM_BR: {
unsigned NumOps = Node->getNumOperands();
@@ -1157,10 +1254,12 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
/// InstrEmitter - Construct an InstrEmitter and set it to start inserting
/// at the given position in the given block.
-InstrEmitter::InstrEmitter(MachineBasicBlock *mbb,
+InstrEmitter::InstrEmitter(const TargetMachine &TM, MachineBasicBlock *mbb,
MachineBasicBlock::iterator insertpos)
: MF(mbb->getParent()), MRI(&MF->getRegInfo()),
TII(MF->getSubtarget().getInstrInfo()),
TRI(MF->getSubtarget().getRegisterInfo()),
TLI(MF->getSubtarget().getTargetLowering()), MBB(mbb),
- InsertPos(insertpos) {}
+ InsertPos(insertpos) {
+ EmitDebugInstrRefs = TM.Options.ValueTrackingVariableLocations;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
index c3567eae9161..09658b8143fe 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -26,6 +26,7 @@ class MCInstrDesc;
class SDDbgLabel;
class SDDbgValue;
class TargetLowering;
+class TargetMachine;
class LLVM_LIBRARY_VISIBILITY InstrEmitter {
MachineFunction *MF;
@@ -37,6 +38,9 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter {
MachineBasicBlock *MBB;
MachineBasicBlock::iterator InsertPos;
+ /// Should we try to produce DBG_INSTR_REF instructions?
+ bool EmitDebugInstrRefs;
+
/// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an
/// implicit physical register output.
void EmitCopyFromReg(SDNode *Node, unsigned ResNo,
@@ -109,6 +113,11 @@ public:
MachineInstr *EmitDbgValue(SDDbgValue *SD,
DenseMap<SDValue, Register> &VRBaseMap);
+ /// Attempt to emit a dbg_value as a DBG_INSTR_REF. May fail and return
+ /// nullptr, in which case we fall back to plain EmitDbgValue.
+ MachineInstr *EmitDbgInstrRef(SDDbgValue *SD,
+ DenseMap<SDValue, Register> &VRBaseMap);
+
/// Generate machine instruction for a dbg_label node.
MachineInstr *EmitDbgLabel(SDDbgLabel *SD);
@@ -130,7 +139,8 @@ public:
/// InstrEmitter - Construct an InstrEmitter and set it to start inserting
/// at the given position in the given block.
- InstrEmitter(MachineBasicBlock *mbb, MachineBasicBlock::iterator insertpos);
+ InstrEmitter(const TargetMachine &TM, MachineBasicBlock *mbb,
+ MachineBasicBlock::iterator insertpos);
private:
void EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 6a6004c158bb..62d7191036ca 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -173,14 +173,17 @@ private:
SDValue NewIntValue) const;
SDValue ExpandFCOPYSIGN(SDNode *Node) const;
SDValue ExpandFABS(SDNode *Node) const;
+ SDValue ExpandFNEG(SDNode *Node) const;
SDValue ExpandLegalINT_TO_FP(SDNode *Node, SDValue &Chain);
void PromoteLegalINT_TO_FP(SDNode *N, const SDLoc &dl,
SmallVectorImpl<SDValue> &Results);
void PromoteLegalFP_TO_INT(SDNode *N, const SDLoc &dl,
SmallVectorImpl<SDValue> &Results);
+ SDValue PromoteLegalFP_TO_INT_SAT(SDNode *Node, const SDLoc &dl);
SDValue ExpandBITREVERSE(SDValue Op, const SDLoc &dl);
SDValue ExpandBSWAP(SDValue Op, const SDLoc &dl);
+ SDValue ExpandPARITY(SDValue Op, const SDLoc &dl);
SDValue ExpandExtractFromVectorThroughStack(SDValue Op);
SDValue ExpandInsertToVectorThroughStack(SDValue Op);
@@ -428,7 +431,6 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
LLVM_DEBUG(dbgs() << "Optimizing float store operations\n");
// Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
- // FIXME: We shouldn't do this for TargetConstantFP's.
// FIXME: move this to the DAG Combiner! Note that we can't regress due
// to phase ordering between legalized code and the dag combiner. This
// probably means that we need to integrate dag combiner and legalizer
@@ -436,10 +438,16 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
// We generally can't do this one for long doubles.
SDValue Chain = ST->getChain();
SDValue Ptr = ST->getBasePtr();
+ SDValue Value = ST->getValue();
MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
AAMDNodes AAInfo = ST->getAAInfo();
SDLoc dl(ST);
- if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) {
+
+ // Don't optimise TargetConstantFP
+ if (Value.getOpcode() == ISD::TargetConstantFP)
+ return SDValue();
+
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Value)) {
if (CFP->getValueType(0) == MVT::f32 &&
TLI.isTypeLegal(MVT::i32)) {
SDValue Con = DAG.getConstant(CFP->getValueAPF().
@@ -470,7 +478,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(),
ST->getOriginalAlign(), MMOFlags, AAInfo);
- Ptr = DAG.getMemBasePlusOffset(Ptr, 4, dl);
+ Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(4), dl);
Hi = DAG.getStore(Chain, dl, Hi, Ptr,
ST->getPointerInfo().getWithOffset(4),
ST->getOriginalAlign(), MMOFlags, AAInfo);
@@ -479,7 +487,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
}
}
}
- return SDValue(nullptr, 0);
+ return SDValue();
}
void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
@@ -540,28 +548,29 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
LLVM_DEBUG(dbgs() << "Legalizing truncating store operations\n");
SDValue Value = ST->getValue();
EVT StVT = ST->getMemoryVT();
- unsigned StWidth = StVT.getSizeInBits();
+ TypeSize StWidth = StVT.getSizeInBits();
+ TypeSize StSize = StVT.getStoreSizeInBits();
auto &DL = DAG.getDataLayout();
- if (StWidth != StVT.getStoreSizeInBits()) {
+ if (StWidth != StSize) {
// Promote to a byte-sized store with upper bits zero if not
// storing an integral number of bytes. For example, promote
// TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
- EVT NVT = EVT::getIntegerVT(*DAG.getContext(),
- StVT.getStoreSizeInBits());
+ EVT NVT = EVT::getIntegerVT(*DAG.getContext(), StSize.getFixedSize());
Value = DAG.getZeroExtendInReg(Value, dl, StVT);
SDValue Result =
DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), NVT,
ST->getOriginalAlign(), MMOFlags, AAInfo);
ReplaceNode(SDValue(Node, 0), Result);
- } else if (StWidth & (StWidth - 1)) {
+ } else if (!StVT.isVector() && !isPowerOf2_64(StWidth.getFixedSize())) {
// If not storing a power-of-2 number of bits, expand as two stores.
assert(!StVT.isVector() && "Unsupported truncstore!");
- unsigned LogStWidth = Log2_32(StWidth);
+ unsigned StWidthBits = StWidth.getFixedSize();
+ unsigned LogStWidth = Log2_32(StWidthBits);
assert(LogStWidth < 32);
unsigned RoundWidth = 1 << LogStWidth;
- assert(RoundWidth < StWidth);
- unsigned ExtraWidth = StWidth - RoundWidth;
+ assert(RoundWidth < StWidthBits);
+ unsigned ExtraWidth = StWidthBits - RoundWidth;
assert(ExtraWidth < RoundWidth);
assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
"Store size not an integral number of bytes!");
@@ -578,7 +587,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
// Store the remaining ExtraWidth bits.
IncrementSize = RoundWidth / 8;
- Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl);
+ Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(IncrementSize), dl);
Hi = DAG.getNode(
ISD::SRL, dl, Value.getValueType(), Value,
DAG.getConstant(RoundWidth, dl,
@@ -718,7 +727,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
LLVM_DEBUG(dbgs() << "Legalizing extending load operation\n");
EVT SrcVT = LD->getMemoryVT();
- unsigned SrcWidth = SrcVT.getSizeInBits();
+ TypeSize SrcWidth = SrcVT.getSizeInBits();
MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
AAMDNodes AAInfo = LD->getAAInfo();
@@ -764,14 +773,15 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
Value = Result;
Chain = Ch;
- } else if (SrcWidth & (SrcWidth - 1)) {
+ } else if (!isPowerOf2_64(SrcWidth.getKnownMinSize())) {
// If not loading a power-of-2 number of bits, expand as two loads.
assert(!SrcVT.isVector() && "Unsupported extload!");
- unsigned LogSrcWidth = Log2_32(SrcWidth);
+ unsigned SrcWidthBits = SrcWidth.getFixedSize();
+ unsigned LogSrcWidth = Log2_32(SrcWidthBits);
assert(LogSrcWidth < 32);
unsigned RoundWidth = 1 << LogSrcWidth;
- assert(RoundWidth < SrcWidth);
- unsigned ExtraWidth = SrcWidth - RoundWidth;
+ assert(RoundWidth < SrcWidthBits);
+ unsigned ExtraWidth = SrcWidthBits - RoundWidth;
assert(ExtraWidth < RoundWidth);
assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
"Load size not an integral number of bytes!");
@@ -790,7 +800,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
// Load the remaining ExtraWidth bits.
IncrementSize = RoundWidth / 8;
- Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl);
+ Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(IncrementSize), dl);
Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr,
LD->getPointerInfo().getWithOffset(IncrementSize),
ExtraVT, LD->getOriginalAlign(), MMOFlags, AAInfo);
@@ -818,7 +828,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
// Load the remaining ExtraWidth bits.
IncrementSize = RoundWidth / 8;
- Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl);
+ Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(IncrementSize), dl);
Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Chain, Ptr,
LD->getPointerInfo().getWithOffset(IncrementSize),
ExtraVT, LD->getOriginalAlign(), MMOFlags, AAInfo);
@@ -1103,6 +1113,18 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
// They'll be converted to Copy(To/From)Reg.
Action = TargetLowering::Legal;
break;
+ case ISD::UBSANTRAP:
+ Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+ if (Action == TargetLowering::Expand) {
+ // replace ISD::UBSANTRAP with ISD::TRAP
+ SDValue NewVal;
+ NewVal = DAG.getNode(ISD::TRAP, SDLoc(Node), Node->getVTList(),
+ Node->getOperand(0));
+ ReplaceNode(Node, NewVal.getNode());
+ LegalizeOp(NewVal.getNode());
+ return;
+ }
+ break;
case ISD::DEBUGTRAP:
Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
if (Action == TargetLowering::Expand) {
@@ -1118,10 +1140,13 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
case ISD::SADDSAT:
case ISD::UADDSAT:
case ISD::SSUBSAT:
- case ISD::USUBSAT: {
+ case ISD::USUBSAT:
+ case ISD::SSHLSAT:
+ case ISD::USHLSAT:
+ case ISD::FP_TO_SINT_SAT:
+ case ISD::FP_TO_UINT_SAT:
Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
break;
- }
case ISD::SMULFIX:
case ISD::SMULFIXSAT:
case ISD::UMULFIX:
@@ -1159,6 +1184,10 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
Action = TLI.getOperationAction(
Node->getOpcode(), Node->getOperand(0).getValueType());
break;
+ case ISD::VECREDUCE_SEQ_FADD:
+ Action = TLI.getOperationAction(
+ Node->getOpcode(), Node->getOperand(1).getValueType());
+ break;
default:
if (Node->getOpcode() >= ISD::BUILTIN_OP_END) {
Action = TargetLowering::Legal;
@@ -1411,6 +1440,12 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
SmallVector<SDValue, 8> Stores;
unsigned TypeByteSize = MemVT.getSizeInBits() / 8;
assert(TypeByteSize > 0 && "Vector element type too small for stack store!");
+
+ // If the destination vector element type of a BUILD_VECTOR is narrower than
+ // the source element type, only store the bits necessary.
+ bool Truncate = isa<BuildVectorSDNode>(Node) &&
+ MemVT.bitsLT(Node->getOperand(0).getValueType());
+
// Store (in the right endianness) the elements to memory.
for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
// Ignore undef elements.
@@ -1418,11 +1453,9 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
unsigned Offset = TypeByteSize*i;
- SDValue Idx = DAG.getMemBasePlusOffset(FIPtr, Offset, dl);
+ SDValue Idx = DAG.getMemBasePlusOffset(FIPtr, TypeSize::Fixed(Offset), dl);
- // If the destination vector element type is narrower than the source
- // element type, only store the bits necessary.
- if (MemVT.bitsLT(Node->getOperand(i).getValueType()))
+ if (Truncate)
Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl,
Node->getOperand(i), Idx,
PtrInfo.getWithOffset(Offset), MemVT));
@@ -1448,7 +1481,7 @@ void SelectionDAGLegalize::getSignAsIntValue(FloatSignAsInt &State,
const SDLoc &DL,
SDValue Value) const {
EVT FloatVT = Value.getValueType();
- unsigned NumBits = FloatVT.getSizeInBits();
+ unsigned NumBits = FloatVT.getScalarSizeInBits();
State.FloatVT = FloatVT;
EVT IVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
// Convert to an integer of the same size.
@@ -1480,8 +1513,9 @@ void SelectionDAGLegalize::getSignAsIntValue(FloatSignAsInt &State,
State.IntPointerInfo = State.FloatPointerInfo;
} else {
// Advance the pointer so that the loaded byte will contain the sign bit.
- unsigned ByteOffset = (FloatVT.getSizeInBits() / 8) - 1;
- IntPtr = DAG.getMemBasePlusOffset(StackPtr, ByteOffset, DL);
+ unsigned ByteOffset = (NumBits / 8) - 1;
+ IntPtr =
+ DAG.getMemBasePlusOffset(StackPtr, TypeSize::Fixed(ByteOffset), DL);
State.IntPointerInfo = MachinePointerInfo::getFixedStack(MF, FI,
ByteOffset);
}
@@ -1489,7 +1523,7 @@ void SelectionDAGLegalize::getSignAsIntValue(FloatSignAsInt &State,
State.IntPtr = IntPtr;
State.IntValue = DAG.getExtLoad(ISD::EXTLOAD, DL, LoadTy, State.Chain, IntPtr,
State.IntPointerInfo, MVT::i8);
- State.SignMask = APInt::getOneBitSet(LoadTy.getSizeInBits(), 7);
+ State.SignMask = APInt::getOneBitSet(LoadTy.getScalarSizeInBits(), 7);
State.SignBit = 7;
}
@@ -1544,7 +1578,8 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode *Node) const {
// Get the signbit at the right position for MagAsInt.
int ShiftAmount = SignAsInt.SignBit - MagAsInt.SignBit;
EVT ShiftVT = IntVT;
- if (SignBit.getValueSizeInBits() < ClearedSign.getValueSizeInBits()) {
+ if (SignBit.getScalarValueSizeInBits() <
+ ClearedSign.getScalarValueSizeInBits()) {
SignBit = DAG.getNode(ISD::ZERO_EXTEND, DL, MagVT, SignBit);
ShiftVT = MagVT;
}
@@ -1555,7 +1590,8 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode *Node) const {
SDValue ShiftCnst = DAG.getConstant(-ShiftAmount, DL, ShiftVT);
SignBit = DAG.getNode(ISD::SHL, DL, ShiftVT, SignBit, ShiftCnst);
}
- if (SignBit.getValueSizeInBits() > ClearedSign.getValueSizeInBits()) {
+ if (SignBit.getScalarValueSizeInBits() >
+ ClearedSign.getScalarValueSizeInBits()) {
SignBit = DAG.getNode(ISD::TRUNCATE, DL, MagVT, SignBit);
}
@@ -1564,6 +1600,22 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode *Node) const {
return modifySignAsInt(MagAsInt, DL, CopiedSign);
}
+SDValue SelectionDAGLegalize::ExpandFNEG(SDNode *Node) const {
+ // Get the sign bit as an integer.
+ SDLoc DL(Node);
+ FloatSignAsInt SignAsInt;
+ getSignAsIntValue(SignAsInt, DL, Node->getOperand(0));
+ EVT IntVT = SignAsInt.IntValue.getValueType();
+
+ // Flip the sign.
+ SDValue SignMask = DAG.getConstant(SignAsInt.SignMask, DL, IntVT);
+ SDValue SignFlip =
+ DAG.getNode(ISD::XOR, DL, IntVT, SignAsInt.IntValue, SignMask);
+
+ // Convert back to float.
+ return modifySignAsInt(SignAsInt, DL, SignFlip);
+}
+
SDValue SelectionDAGLegalize::ExpandFABS(SDNode *Node) const {
SDLoc DL(Node);
SDValue Value = Node->getOperand(0);
@@ -1587,7 +1639,7 @@ SDValue SelectionDAGLegalize::ExpandFABS(SDNode *Node) const {
void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
SmallVectorImpl<SDValue> &Results) {
- unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore();
+ Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and"
" not tell us which reg is the stack pointer!");
SDLoc dl(Node);
@@ -1681,21 +1733,41 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(
unsigned Opc = 0;
switch (CCCode) {
default: llvm_unreachable("Don't know how to expand this condition!");
+ case ISD::SETUO:
+ if (TLI.isCondCodeLegal(ISD::SETUNE, OpVT)) {
+ CC1 = ISD::SETUNE; CC2 = ISD::SETUNE; Opc = ISD::OR;
+ break;
+ }
+ assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
+ "If SETUE is expanded, SETOEQ or SETUNE must be legal!");
+ NeedInvert = true;
+ LLVM_FALLTHROUGH;
case ISD::SETO:
assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT)
&& "If SETO is expanded, SETOEQ must be legal!");
CC1 = ISD::SETOEQ; CC2 = ISD::SETOEQ; Opc = ISD::AND; break;
- case ISD::SETUO:
- assert(TLI.isCondCodeLegal(ISD::SETUNE, OpVT)
- && "If SETUO is expanded, SETUNE must be legal!");
- CC1 = ISD::SETUNE; CC2 = ISD::SETUNE; Opc = ISD::OR; break;
+ case ISD::SETONE:
+ case ISD::SETUEQ:
+ // If the SETUO or SETO CC isn't legal, we might be able to use
+ // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
+ // of SETOGT/SETOLT to be legal, the other can be emulated by swapping
+ // the operands.
+ CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
+ if (!TLI.isCondCodeLegal(CC2, OpVT) &&
+ (TLI.isCondCodeLegal(ISD::SETOGT, OpVT) ||
+ TLI.isCondCodeLegal(ISD::SETOLT, OpVT))) {
+ CC1 = ISD::SETOGT;
+ CC2 = ISD::SETOLT;
+ Opc = ISD::OR;
+ NeedInvert = ((unsigned)CCCode & 0x8U);
+ break;
+ }
+ LLVM_FALLTHROUGH;
case ISD::SETOEQ:
case ISD::SETOGT:
case ISD::SETOGE:
case ISD::SETOLT:
case ISD::SETOLE:
- case ISD::SETONE:
- case ISD::SETUEQ:
case ISD::SETUNE:
case ISD::SETUGT:
case ISD::SETUGE:
@@ -1727,12 +1799,16 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(
if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
// If we aren't the ordered or unorder operation,
// then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
- SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling);
- SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling);
+ SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain,
+ IsSignaling);
+ SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain,
+ IsSignaling);
} else {
// Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
- SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling);
- SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling);
+ SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain,
+ IsSignaling);
+ SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain,
+ IsSignaling);
}
if (Chain)
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
@@ -1758,27 +1834,34 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT,
SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT,
EVT DestVT, const SDLoc &dl,
SDValue Chain) {
+ unsigned SrcSize = SrcOp.getValueSizeInBits();
+ unsigned SlotSize = SlotVT.getSizeInBits();
+ unsigned DestSize = DestVT.getSizeInBits();
+ Type *DestType = DestVT.getTypeForEVT(*DAG.getContext());
+ Align DestAlign = DAG.getDataLayout().getPrefTypeAlign(DestType);
+
+ // Don't convert with stack if the load/store is expensive.
+ if ((SrcSize > SlotSize &&
+ !TLI.isTruncStoreLegalOrCustom(SrcOp.getValueType(), SlotVT)) ||
+ (SlotSize < DestSize &&
+ !TLI.isLoadExtLegalOrCustom(ISD::EXTLOAD, DestVT, SlotVT)))
+ return SDValue();
+
// Create the stack frame object.
- unsigned SrcAlign = DAG.getDataLayout().getPrefTypeAlignment(
+ Align SrcAlign = DAG.getDataLayout().getPrefTypeAlign(
SrcOp.getValueType().getTypeForEVT(*DAG.getContext()));
- SDValue FIPtr = DAG.CreateStackTemporary(SlotVT, SrcAlign);
+ SDValue FIPtr = DAG.CreateStackTemporary(SlotVT.getStoreSize(), SrcAlign);
FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(FIPtr);
int SPFI = StackPtrFI->getIndex();
MachinePointerInfo PtrInfo =
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI);
- unsigned SrcSize = SrcOp.getValueSizeInBits();
- unsigned SlotSize = SlotVT.getSizeInBits();
- unsigned DestSize = DestVT.getSizeInBits();
- Type *DestType = DestVT.getTypeForEVT(*DAG.getContext());
- unsigned DestAlign = DAG.getDataLayout().getPrefTypeAlignment(DestType);
-
// Emit a store to the stack slot. Use a truncstore if the input value is
// later than DestVT.
SDValue Store;
- if (SrcSize > SlotSize)
+ if (SrcSize > SlotSize)
Store = DAG.getTruncStore(Chain, dl, SrcOp, FIPtr, PtrInfo,
SlotVT, SrcAlign);
else {
@@ -1790,7 +1873,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT,
// Result is a load from the stack slot.
if (SlotSize == DestSize)
return DAG.getLoad(DestVT, dl, Store, FIPtr, PtrInfo, DestAlign);
-
+
assert(SlotSize < DestSize && "Unknown extension!");
return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr, PtrInfo, SlotVT,
DestAlign);
@@ -2111,7 +2194,7 @@ void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
if (Node->isStrictFPOpcode()) {
EVT RetVT = Node->getValueType(0);
- SmallVector<SDValue, 4> Ops(Node->op_begin() + 1, Node->op_end());
+ SmallVector<SDValue, 4> Ops(drop_begin(Node->ops()));
TargetLowering::MakeLibCallOptions CallOptions;
// FIXME: This doesn't support tail calls.
std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RetVT,
@@ -2361,7 +2444,11 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(SDNode *Node,
// TODO: Should any fast-math-flags be set for the created nodes?
LLVM_DEBUG(dbgs() << "Legalizing INT_TO_FP\n");
- if (SrcVT == MVT::i32 && TLI.isTypeLegal(MVT::f64)) {
+ if (SrcVT == MVT::i32 && TLI.isTypeLegal(MVT::f64) &&
+ (DestVT.bitsLE(MVT::f64) ||
+ TLI.isOperationLegal(Node->isStrictFPOpcode() ? ISD::STRICT_FP_EXTEND
+ : ISD::FP_EXTEND,
+ DestVT))) {
LLVM_DEBUG(dbgs() << "32-bit [signed|unsigned] integer to float/double "
"expansion\n");
@@ -2388,7 +2475,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(SDNode *Node,
SDValue Store1 = DAG.getStore(MemChain, dl, Lo, StackSlot,
MachinePointerInfo());
// Store the hi of the constructed double.
- SDValue HiPtr = DAG.getMemBasePlusOffset(StackSlot, 4, dl);
+ SDValue HiPtr = DAG.getMemBasePlusOffset(StackSlot, TypeSize::Fixed(4), dl);
SDValue Store2 =
DAG.getStore(MemChain, dl, Hi, HiPtr, MachinePointerInfo());
MemChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
@@ -2423,16 +2510,24 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(SDNode *Node,
}
return Result;
}
- // Code below here assumes !isSigned without checking again.
- assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet");
+
+ if (isSigned)
+ return SDValue();
// TODO: Generalize this for use with other types.
- if ((SrcVT == MVT::i32 || SrcVT == MVT::i64) && DestVT == MVT::f32) {
- LLVM_DEBUG(dbgs() << "Converting unsigned i32/i64 to f32\n");
+ if (((SrcVT == MVT::i32 || SrcVT == MVT::i64) && DestVT == MVT::f32) ||
+ (SrcVT == MVT::i64 && DestVT == MVT::f64)) {
+ LLVM_DEBUG(dbgs() << "Converting unsigned i32/i64 to f32/f64\n");
// For unsigned conversions, convert them to signed conversions using the
// algorithm from the x86_64 __floatundisf in compiler_rt. That method
// should be valid for i32->f32 as well.
+ // More generally this transform should be valid if there are 3 more bits
+ // in the integer type than the significand. Rounding uses the first bit
+ // after the width of the significand and the OR of all bits after that. So
+ // we need to be able to OR the shifted out bit into one of the bits that
+ // participate in the OR.
+
// TODO: This really should be implemented using a branch rather than a
// select. We happen to get lucky and machinesink does the right
// thing most of the time. This would be a good candidate for a
@@ -2476,6 +2571,11 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(SDNode *Node,
return DAG.getSelect(dl, DestVT, SignBitTest, Slow, Fast);
}
+ // Don't expand it if there isn't cheap fadd.
+ if (!TLI.isOperationLegalOrCustom(
+ Node->isStrictFPOpcode() ? ISD::STRICT_FADD : ISD::FADD, DestVT))
+ return SDValue();
+
// The following optimization is valid only if every value in SrcVT (when
// treated as signed) is representable in DestVT. Check that the mantissa
// size of DestVT is >= than the number of bits in SrcVT -1.
@@ -2502,7 +2602,8 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(SDNode *Node,
// offset depending on the data type.
uint64_t FF;
switch (SrcVT.getSimpleVT().SimpleTy) {
- default: llvm_unreachable("Unsupported integer type!");
+ default:
+ return SDValue();
case MVT::i8 : FF = 0x43800000ULL; break; // 2^8 (as a float)
case MVT::i16: FF = 0x47800000ULL; break; // 2^16 (as a float)
case MVT::i32: FF = 0x4F800000ULL; break; // 2^32 (as a float)
@@ -2657,6 +2758,30 @@ void SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDNode *N, const SDLoc &dl,
Results.push_back(Operation.getValue(1));
}
+/// Promote FP_TO_*INT_SAT operation to a larger result type. At this point
+/// the result and operand types are legal and there must be a legal
+/// FP_TO_*INT_SAT operation for a larger result type.
+SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT_SAT(SDNode *Node,
+ const SDLoc &dl) {
+ unsigned Opcode = Node->getOpcode();
+
+ // Scan for the appropriate larger type to use.
+ EVT NewOutTy = Node->getValueType(0);
+ while (true) {
+ NewOutTy = (MVT::SimpleValueType)(NewOutTy.getSimpleVT().SimpleTy + 1);
+ assert(NewOutTy.isInteger() && "Ran out of possibilities!");
+
+ if (TLI.isOperationLegalOrCustom(Opcode, NewOutTy))
+ break;
+ }
+
+ // Saturation width is determined by second operand, so we don't have to
+ // perform any fixup and can directly truncate the result.
+ SDValue Result = DAG.getNode(Opcode, dl, NewOutTy, Node->getOperand(0),
+ Node->getOperand(1));
+ return DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0), Result);
+}
+
/// Legalize a BITREVERSE scalar/vector operation as a series of mask + shifts.
SDValue SelectionDAGLegalize::ExpandBITREVERSE(SDValue Op, const SDLoc &dl) {
EVT VT = Op.getValueType();
@@ -2773,6 +2898,28 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, const SDLoc &dl) {
}
}
+/// Open code the operations for PARITY of the specified operation.
+SDValue SelectionDAGLegalize::ExpandPARITY(SDValue Op, const SDLoc &dl) {
+ EVT VT = Op.getValueType();
+ EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
+ unsigned Sz = VT.getScalarSizeInBits();
+
+ // If CTPOP is legal, use it. Otherwise use shifts and xor.
+ SDValue Result;
+ if (TLI.isOperationLegal(ISD::CTPOP, VT)) {
+ Result = DAG.getNode(ISD::CTPOP, dl, VT, Op);
+ } else {
+ Result = Op;
+ for (unsigned i = Log2_32_Ceil(Sz); i != 0;) {
+ SDValue Shift = DAG.getNode(ISD::SRL, dl, VT, Result,
+ DAG.getConstant(1ULL << (--i), dl, ShVT));
+ Result = DAG.getNode(ISD::XOR, dl, VT, Result, Shift);
+ }
+ }
+
+ return DAG.getNode(ISD::AND, dl, VT, Result, DAG.getConstant(1, dl, VT));
+}
+
bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
LLVM_DEBUG(dbgs() << "Trying to expand node\n");
SmallVector<SDValue, 8> Results;
@@ -2804,6 +2951,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
case ISD::BSWAP:
Results.push_back(ExpandBSWAP(Node->getOperand(0), dl));
break;
+ case ISD::PARITY:
+ Results.push_back(ExpandPARITY(Node->getOperand(0), dl));
+ break;
case ISD::FRAMEADDR:
case ISD::RETURNADDR:
case ISD::FRAME_TO_ARGS_OFFSET:
@@ -2948,18 +3098,19 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
break;
// We fall back to use stack operation when the FP_ROUND operation
// isn't available.
- Tmp1 = EmitStackConvert(Node->getOperand(1),
- Node->getValueType(0),
- Node->getValueType(0), dl, Node->getOperand(0));
- ReplaceNode(Node, Tmp1.getNode());
- LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_FP_ROUND node\n");
- return true;
+ if ((Tmp1 = EmitStackConvert(Node->getOperand(1), Node->getValueType(0),
+ Node->getValueType(0), dl,
+ Node->getOperand(0)))) {
+ ReplaceNode(Node, Tmp1.getNode());
+ LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_FP_ROUND node\n");
+ return true;
+ }
+ break;
case ISD::FP_ROUND:
case ISD::BITCAST:
- Tmp1 = EmitStackConvert(Node->getOperand(0),
- Node->getValueType(0),
- Node->getValueType(0), dl);
- Results.push_back(Tmp1);
+ if ((Tmp1 = EmitStackConvert(Node->getOperand(0), Node->getValueType(0),
+ Node->getValueType(0), dl)))
+ Results.push_back(Tmp1);
break;
case ISD::STRICT_FP_EXTEND:
// When strict mode is enforced we can't do expansion because it
@@ -2974,17 +3125,19 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
break;
// We fall back to use stack operation when the FP_EXTEND operation
// isn't available.
- Tmp1 = EmitStackConvert(Node->getOperand(1),
- Node->getOperand(1).getValueType(),
- Node->getValueType(0), dl, Node->getOperand(0));
- ReplaceNode(Node, Tmp1.getNode());
- LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_FP_EXTEND node\n");
- return true;
+ if ((Tmp1 = EmitStackConvert(
+ Node->getOperand(1), Node->getOperand(1).getValueType(),
+ Node->getValueType(0), dl, Node->getOperand(0)))) {
+ ReplaceNode(Node, Tmp1.getNode());
+ LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_FP_EXTEND node\n");
+ return true;
+ }
+ break;
case ISD::FP_EXTEND:
- Tmp1 = EmitStackConvert(Node->getOperand(0),
- Node->getOperand(0).getValueType(),
- Node->getValueType(0), dl);
- Results.push_back(Tmp1);
+ if ((Tmp1 = EmitStackConvert(Node->getOperand(0),
+ Node->getOperand(0).getValueType(),
+ Node->getValueType(0), dl)))
+ Results.push_back(Tmp1);
break;
case ISD::SIGN_EXTEND_INREG: {
EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
@@ -3029,10 +3182,11 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
LLVM_FALLTHROUGH;
case ISD::SINT_TO_FP:
case ISD::STRICT_SINT_TO_FP:
- Tmp1 = ExpandLegalINT_TO_FP(Node, Tmp2);
- Results.push_back(Tmp1);
- if (Node->isStrictFPOpcode())
- Results.push_back(Tmp2);
+ if ((Tmp1 = ExpandLegalINT_TO_FP(Node, Tmp2))) {
+ Results.push_back(Tmp1);
+ if (Node->isStrictFPOpcode())
+ Results.push_back(Tmp2);
+ }
break;
case ISD::FP_TO_SINT:
if (TLI.expandFP_TO_SINT(Node, Tmp1, DAG))
@@ -3059,6 +3213,10 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
return true;
}
break;
+ case ISD::FP_TO_SINT_SAT:
+ case ISD::FP_TO_UINT_SAT:
+ Results.push_back(TLI.expandFP_TO_INT_SAT(Node, DAG));
+ break;
case ISD::VAARG:
Results.push_back(DAG.expandVAArg(Node));
Results.push_back(Results[0].getValue(1));
@@ -3187,7 +3345,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
case ISD::STACKSAVE:
// Expand to CopyFromReg if the target set
// StackPointerRegisterToSaveRestore.
- if (unsigned SP = TLI.getStackPointerRegisterToSaveRestore()) {
+ if (Register SP = TLI.getStackPointerRegisterToSaveRestore()) {
Results.push_back(DAG.getCopyFromReg(Node->getOperand(0), dl, SP,
Node->getValueType(0)));
Results.push_back(Results[0].getValue(1));
@@ -3199,7 +3357,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
case ISD::STACKRESTORE:
// Expand to CopyToReg if the target set
// StackPointerRegisterToSaveRestore.
- if (unsigned SP = TLI.getStackPointerRegisterToSaveRestore()) {
+ if (Register SP = TLI.getStackPointerRegisterToSaveRestore()) {
Results.push_back(DAG.getCopyToReg(Node->getOperand(0), dl, SP,
Node->getOperand(1)));
} else {
@@ -3214,12 +3372,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(ExpandFCOPYSIGN(Node));
break;
case ISD::FNEG:
- // Expand Y = FNEG(X) -> Y = SUB -0.0, X
- Tmp1 = DAG.getConstantFP(-0.0, dl, Node->getValueType(0));
- // TODO: If FNEG has fast-math-flags, propagate them to the FSUB.
- Tmp1 = DAG.getNode(ISD::FSUB, dl, Node->getValueType(0), Tmp1,
- Node->getOperand(0));
- Results.push_back(Tmp1);
+ Results.push_back(ExpandFNEG(Node));
break;
case ISD::FABS:
Results.push_back(ExpandFABS(Node));
@@ -3315,7 +3468,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
// Check to see if this FP immediate is already legal.
// If this is a legal constant, turn it into a TargetConstantFP node.
if (!TLI.isFPImmLegal(CFP->getValueAPF(), Node->getValueType(0),
- DAG.getMachineFunction().getFunction().hasOptSize()))
+ DAG.shouldOptForSize()))
Results.push_back(ExpandConstantFP(CFP, true));
break;
}
@@ -3394,7 +3547,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
SmallVector<SDValue, 4> Halves;
EVT HalfType = EVT(VT).getHalfSizedIntegerVT(*DAG.getContext());
assert(TLI.isTypeLegal(HalfType));
- if (TLI.expandMUL_LOHI(Node->getOpcode(), VT, Node, LHS, RHS, Halves,
+ if (TLI.expandMUL_LOHI(Node->getOpcode(), VT, dl, LHS, RHS, Halves,
HalfType, DAG,
TargetLowering::MulExpansionKind::Always)) {
for (unsigned i = 0; i < 2; ++i) {
@@ -3463,7 +3616,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
break;
case ISD::ROTL:
case ISD::ROTR:
- if (TLI.expandROT(Node, Tmp1, DAG))
+ if (TLI.expandROT(Node, true /*AllowVectorOps*/, Tmp1, DAG))
Results.push_back(Tmp1);
break;
case ISD::SADDSAT:
@@ -3472,6 +3625,10 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
case ISD::USUBSAT:
Results.push_back(TLI.expandAddSubSat(Node, DAG));
break;
+ case ISD::SSHLSAT:
+ case ISD::USHLSAT:
+ Results.push_back(TLI.expandShlSat(Node, DAG));
+ break;
case ISD::SMULFIX:
case ISD::SMULFIXSAT:
case ISD::UMULFIX:
@@ -3809,16 +3966,16 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
(void)Legalized;
assert(Legalized && "Can't legalize BR_CC with legal condition!");
- assert(!NeedInvert && "Don't know how to invert BR_CC!");
-
// If we expanded the SETCC by swapping LHS and RHS, create a new BR_CC
// node.
if (Tmp4.getNode()) {
+ assert(!NeedInvert && "Don't know how to invert BR_CC!");
+
Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1,
Tmp4, Tmp2, Tmp3, Node->getOperand(4));
} else {
Tmp3 = DAG.getConstant(0, dl, Tmp2.getValueType());
- Tmp4 = DAG.getCondCode(ISD::SETNE);
+ Tmp4 = DAG.getCondCode(NeedInvert ? ISD::SETEQ : ISD::SETNE);
Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4,
Tmp2, Tmp3, Node->getOperand(4));
}
@@ -3899,6 +4056,27 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
== TargetLowering::Legal)
return true;
break;
+ case ISD::STRICT_FSUB: {
+ if (TLI.getStrictFPOperationAction(
+ ISD::STRICT_FSUB, Node->getValueType(0)) == TargetLowering::Legal)
+ return true;
+ if (TLI.getStrictFPOperationAction(
+ ISD::STRICT_FADD, Node->getValueType(0)) != TargetLowering::Legal)
+ break;
+
+ EVT VT = Node->getValueType(0);
+ const SDNodeFlags Flags = Node->getFlags();
+ SDValue Neg = DAG.getNode(ISD::FNEG, dl, VT, Node->getOperand(2), Flags);
+ SDValue Fadd = DAG.getNode(ISD::STRICT_FADD, dl, Node->getVTList(),
+ {Node->getOperand(0), Node->getOperand(1), Neg},
+ Flags);
+
+ Results.push_back(Fadd);
+ Results.push_back(Fadd.getValue(1));
+ break;
+ }
+ case ISD::STRICT_SINT_TO_FP:
+ case ISD::STRICT_UINT_TO_FP:
case ISD::STRICT_LRINT:
case ISD::STRICT_LLRINT:
case ISD::STRICT_LROUND:
@@ -3967,12 +4145,23 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
case ISD::ATOMIC_LOAD_UMAX:
case ISD::ATOMIC_CMP_SWAP: {
MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT();
- RTLIB::Libcall LC = RTLIB::getSYNC(Opc, VT);
- assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!");
-
+ AtomicOrdering Order = cast<AtomicSDNode>(Node)->getOrdering();
+ RTLIB::Libcall LC = RTLIB::getOUTLINE_ATOMIC(Opc, Order, VT);
EVT RetVT = Node->getValueType(0);
- SmallVector<SDValue, 4> Ops(Node->op_begin() + 1, Node->op_end());
TargetLowering::MakeLibCallOptions CallOptions;
+ SmallVector<SDValue, 4> Ops;
+ if (TLI.getLibcallName(LC)) {
+ // If outline atomic available, prepare its arguments and expand.
+ Ops.append(Node->op_begin() + 2, Node->op_end());
+ Ops.push_back(Node->getOperand(1));
+
+ } else {
+ LC = RTLIB::getSYNC(Opc, VT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL &&
+ "Unexpected atomic op or value type!");
+ // Arguments for expansion to sync libcall
+ Ops.append(Node->op_begin() + 1, Node->op_end());
+ }
std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RetVT,
Ops, CallOptions,
SDLoc(Node),
@@ -4220,11 +4409,131 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
Results.push_back(ExpandLibCall(LC, Node, false));
break;
}
+ case ISD::STRICT_SINT_TO_FP:
+ case ISD::STRICT_UINT_TO_FP:
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP: {
+ // TODO - Common the code with DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP
+ bool IsStrict = Node->isStrictFPOpcode();
+ bool Signed = Node->getOpcode() == ISD::SINT_TO_FP ||
+ Node->getOpcode() == ISD::STRICT_SINT_TO_FP;
+ EVT SVT = Node->getOperand(IsStrict ? 1 : 0).getValueType();
+ EVT RVT = Node->getValueType(0);
+ EVT NVT = EVT();
+ SDLoc dl(Node);
+
+ // Even if the input is legal, no libcall may exactly match, eg. we don't
+ // have i1 -> fp conversions. So, it needs to be promoted to a larger type,
+ // eg: i13 -> fp. Then, look for an appropriate libcall.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ for (unsigned t = MVT::FIRST_INTEGER_VALUETYPE;
+ t <= MVT::LAST_INTEGER_VALUETYPE && LC == RTLIB::UNKNOWN_LIBCALL;
+ ++t) {
+ NVT = (MVT::SimpleValueType)t;
+ // The source needs to big enough to hold the operand.
+ if (NVT.bitsGE(SVT))
+ LC = Signed ? RTLIB::getSINTTOFP(NVT, RVT)
+ : RTLIB::getUINTTOFP(NVT, RVT);
+ }
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to legalize as libcall");
+
+ SDValue Chain = IsStrict ? Node->getOperand(0) : SDValue();
+ // Sign/zero extend the argument if the libcall takes a larger type.
+ SDValue Op = DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,
+ NVT, Node->getOperand(IsStrict ? 1 : 0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(Signed);
+ std::pair<SDValue, SDValue> Tmp =
+ TLI.makeLibCall(DAG, LC, RVT, Op, CallOptions, dl, Chain);
+ Results.push_back(Tmp.first);
+ if (IsStrict)
+ Results.push_back(Tmp.second);
+ break;
+ }
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::STRICT_FP_TO_SINT:
+ case ISD::STRICT_FP_TO_UINT: {
+ // TODO - Common the code with DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT.
+ bool IsStrict = Node->isStrictFPOpcode();
+ bool Signed = Node->getOpcode() == ISD::FP_TO_SINT ||
+ Node->getOpcode() == ISD::STRICT_FP_TO_SINT;
+
+ SDValue Op = Node->getOperand(IsStrict ? 1 : 0);
+ EVT SVT = Op.getValueType();
+ EVT RVT = Node->getValueType(0);
+ EVT NVT = EVT();
+ SDLoc dl(Node);
+
+ // Even if the result is legal, no libcall may exactly match, eg. we don't
+ // have fp -> i1 conversions. So, it needs to be promoted to a larger type,
+ // eg: fp -> i32. Then, look for an appropriate libcall.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ for (unsigned IntVT = MVT::FIRST_INTEGER_VALUETYPE;
+ IntVT <= MVT::LAST_INTEGER_VALUETYPE && LC == RTLIB::UNKNOWN_LIBCALL;
+ ++IntVT) {
+ NVT = (MVT::SimpleValueType)IntVT;
+ // The type needs to big enough to hold the result.
+ if (NVT.bitsGE(RVT))
+ LC = Signed ? RTLIB::getFPTOSINT(SVT, NVT)
+ : RTLIB::getFPTOUINT(SVT, NVT);
+ }
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to legalize as libcall");
+
+ SDValue Chain = IsStrict ? Node->getOperand(0) : SDValue();
+ TargetLowering::MakeLibCallOptions CallOptions;
+ std::pair<SDValue, SDValue> Tmp =
+ TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, dl, Chain);
+
+ // Truncate the result if the libcall returns a larger type.
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, RVT, Tmp.first));
+ if (IsStrict)
+ Results.push_back(Tmp.second);
+ break;
+ }
+
+ case ISD::FP_ROUND:
+ case ISD::STRICT_FP_ROUND: {
+ // X = FP_ROUND(Y, TRUNC)
+ // TRUNC is a flag, which is always an integer that is zero or one.
+ // If TRUNC is 0, this is a normal rounding, if it is 1, this FP_ROUND
+ // is known to not change the value of Y.
+ // We can only expand it into libcall if the TRUNC is 0.
+ bool IsStrict = Node->isStrictFPOpcode();
+ SDValue Op = Node->getOperand(IsStrict ? 1 : 0);
+ SDValue Chain = IsStrict ? Node->getOperand(0) : SDValue();
+ EVT VT = Node->getValueType(0);
+ assert(cast<ConstantSDNode>(Node->getOperand(IsStrict ? 2 : 1))
+ ->isNullValue() &&
+ "Unable to expand as libcall if it is not normal rounding");
+
+ RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), VT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to legalize as libcall");
+
+ TargetLowering::MakeLibCallOptions CallOptions;
+ std::pair<SDValue, SDValue> Tmp =
+ TLI.makeLibCall(DAG, LC, VT, Op, CallOptions, SDLoc(Node), Chain);
+ Results.push_back(Tmp.first);
+ if (IsStrict)
+ Results.push_back(Tmp.second);
+ break;
+ }
+ case ISD::FP_EXTEND: {
+ Results.push_back(
+ ExpandLibCall(RTLIB::getFPEXT(Node->getOperand(0).getValueType(),
+ Node->getValueType(0)),
+ Node, false));
+ break;
+ }
+ case ISD::STRICT_FP_EXTEND:
case ISD::STRICT_FP_TO_FP16: {
RTLIB::Libcall LC =
- RTLIB::getFPROUND(Node->getOperand(1).getValueType(), MVT::f16);
- assert(LC != RTLIB::UNKNOWN_LIBCALL &&
- "Unable to expand strict_fp_to_fp16");
+ Node->getOpcode() == ISD::STRICT_FP_TO_FP16
+ ? RTLIB::getFPROUND(Node->getOperand(1).getValueType(), MVT::f16)
+ : RTLIB::getFPEXT(Node->getOperand(1).getValueType(),
+ Node->getValueType(0));
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to legalize as libcall");
+
TargetLowering::MakeLibCallOptions CallOptions;
std::pair<SDValue, SDValue> Tmp =
TLI.makeLibCall(DAG, LC, Node->getValueType(0), Node->getOperand(1),
@@ -4321,7 +4630,9 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
OVT = Node->getOperand(0).getSimpleValueType();
}
if (Node->getOpcode() == ISD::STRICT_UINT_TO_FP ||
- Node->getOpcode() == ISD::STRICT_SINT_TO_FP)
+ Node->getOpcode() == ISD::STRICT_SINT_TO_FP ||
+ Node->getOpcode() == ISD::STRICT_FSETCC ||
+ Node->getOpcode() == ISD::STRICT_FSETCCS)
OVT = Node->getOperand(1).getSimpleValueType();
if (Node->getOpcode() == ISD::BR_CC)
OVT = Node->getOperand(2).getSimpleValueType();
@@ -4381,6 +4692,10 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
case ISD::STRICT_FP_TO_SINT:
PromoteLegalFP_TO_INT(Node, dl, Results);
break;
+ case ISD::FP_TO_UINT_SAT:
+ case ISD::FP_TO_SINT_SAT:
+ Results.push_back(PromoteLegalFP_TO_INT_SAT(Node, dl));
+ break;
case ISD::UINT_TO_FP:
case ISD::STRICT_UINT_TO_FP:
case ISD::SINT_TO_FP:
@@ -4515,13 +4830,29 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
Results.push_back(Tmp1);
break;
}
- case ISD::SETCC: {
+ case ISD::SETCC:
+ case ISD::STRICT_FSETCC:
+ case ISD::STRICT_FSETCCS: {
unsigned ExtOp = ISD::FP_EXTEND;
if (NVT.isInteger()) {
- ISD::CondCode CCCode =
- cast<CondCodeSDNode>(Node->getOperand(2))->get();
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get();
ExtOp = isSignedIntSetCC(CCCode) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
}
+ if (Node->isStrictFPOpcode()) {
+ SDValue InChain = Node->getOperand(0);
+ std::tie(Tmp1, std::ignore) =
+ DAG.getStrictFPExtendOrRound(Node->getOperand(1), InChain, dl, NVT);
+ std::tie(Tmp2, std::ignore) =
+ DAG.getStrictFPExtendOrRound(Node->getOperand(2), InChain, dl, NVT);
+ SmallVector<SDValue, 2> TmpChains = {Tmp1.getValue(1), Tmp2.getValue(1)};
+ SDValue OutChain = DAG.getTokenFactor(dl, TmpChains);
+ SDVTList VTs = DAG.getVTList(Node->getValueType(0), MVT::Other);
+ Results.push_back(DAG.getNode(Node->getOpcode(), dl, VTs,
+ {OutChain, Tmp1, Tmp2, Node->getOperand(3)},
+ Node->getFlags()));
+ Results.push_back(Results.back().getValue(1));
+ break;
+ }
Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0));
Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1));
Results.push_back(DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), Tmp1,
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 7e8ad28f9b14..966645e3256d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -134,6 +134,16 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
case ISD::UINT_TO_FP: R = SoftenFloatRes_XINT_TO_FP(N); break;
case ISD::UNDEF: R = SoftenFloatRes_UNDEF(N); break;
case ISD::VAARG: R = SoftenFloatRes_VAARG(N); break;
+ case ISD::VECREDUCE_FADD:
+ case ISD::VECREDUCE_FMUL:
+ case ISD::VECREDUCE_FMIN:
+ case ISD::VECREDUCE_FMAX:
+ R = SoftenFloatRes_VECREDUCE(N);
+ break;
+ case ISD::VECREDUCE_SEQ_FADD:
+ case ISD::VECREDUCE_SEQ_FMUL:
+ R = SoftenFloatRes_VECREDUCE_SEQ(N);
+ break;
}
// If R is null, the sub-method took care of registering the result.
@@ -772,6 +782,16 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) {
return Tmp.first;
}
+SDValue DAGTypeLegalizer::SoftenFloatRes_VECREDUCE(SDNode *N) {
+ // Expand and soften recursively.
+ ReplaceValueWith(SDValue(N, 0), TLI.expandVecReduce(N, DAG));
+ return SDValue();
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_VECREDUCE_SEQ(SDNode *N) {
+ ReplaceValueWith(SDValue(N, 0), TLI.expandVecReduceSeq(N, DAG));
+ return SDValue();
+}
//===----------------------------------------------------------------------===//
// Convert Float Operand to Integer
@@ -799,6 +819,9 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
case ISD::STRICT_FP_TO_UINT:
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT: Res = SoftenFloatOp_FP_TO_XINT(N); break;
+ case ISD::FP_TO_SINT_SAT:
+ case ISD::FP_TO_UINT_SAT:
+ Res = SoftenFloatOp_FP_TO_XINT_SAT(N); break;
case ISD::STRICT_LROUND:
case ISD::LROUND: Res = SoftenFloatOp_LROUND(N); break;
case ISD::STRICT_LLROUND:
@@ -890,6 +913,24 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) {
0);
}
+// Even if the result type is legal, no libcall may exactly match. (e.g. We
+// don't have FP-i8 conversions) This helper method looks for an appropriate
+// promoted libcall.
+static RTLIB::Libcall findFPToIntLibcall(EVT SrcVT, EVT RetVT, EVT &Promoted,
+ bool Signed) {
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ for (unsigned IntVT = MVT::FIRST_INTEGER_VALUETYPE;
+ IntVT <= MVT::LAST_INTEGER_VALUETYPE && LC == RTLIB::UNKNOWN_LIBCALL;
+ ++IntVT) {
+ Promoted = (MVT::SimpleValueType)IntVT;
+ // The type needs to big enough to hold the result.
+ if (Promoted.bitsGE(RetVT))
+ LC = Signed ? RTLIB::getFPTOSINT(SrcVT, Promoted)
+ : RTLIB::getFPTOUINT(SrcVT, Promoted);
+ }
+ return LC;
+}
+
SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) {
bool IsStrict = N->isStrictFPOpcode();
bool Signed = N->getOpcode() == ISD::FP_TO_SINT ||
@@ -905,16 +946,9 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) {
// a larger type, eg: fp -> i32. Even if it is legal, no libcall may exactly
// match, eg. we don't have fp -> i8 conversions.
// Look for an appropriate libcall.
- RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
- for (unsigned IntVT = MVT::FIRST_INTEGER_VALUETYPE;
- IntVT <= MVT::LAST_INTEGER_VALUETYPE && LC == RTLIB::UNKNOWN_LIBCALL;
- ++IntVT) {
- NVT = (MVT::SimpleValueType)IntVT;
- // The type needs to big enough to hold the result.
- if (NVT.bitsGE(RVT))
- LC = Signed ? RTLIB::getFPTOSINT(SVT, NVT) : RTLIB::getFPTOUINT(SVT, NVT);
- }
- assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_XINT!");
+ RTLIB::Libcall LC = findFPToIntLibcall(SVT, RVT, NVT, Signed);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && NVT.isSimple() &&
+ "Unsupported FP_TO_XINT!");
Op = GetSoftenedFloat(Op);
SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
@@ -934,6 +968,11 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) {
return SDValue();
}
+SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT_SAT(SDNode *N) {
+ SDValue Res = TLI.expandFP_TO_INT_SAT(N, DAG);
+ return Res;
+}
+
SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) {
SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get();
@@ -1200,6 +1239,8 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
case ISD::STRICT_FTRUNC:
case ISD::FTRUNC: ExpandFloatRes_FTRUNC(N, Lo, Hi); break;
case ISD::LOAD: ExpandFloatRes_LOAD(N, Lo, Hi); break;
+ case ISD::STRICT_SINT_TO_FP:
+ case ISD::STRICT_UINT_TO_FP:
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP: ExpandFloatRes_XINT_TO_FP(N, Lo, Hi); break;
case ISD::STRICT_FREM:
@@ -1272,7 +1313,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FABS(SDNode *N, SDValue &Lo,
void DAGTypeLegalizer::ExpandFloatRes_FMINNUM(SDNode *N, SDValue &Lo,
SDValue &Hi) {
- ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
RTLIB::FMIN_F32, RTLIB::FMIN_F64,
RTLIB::FMIN_F80, RTLIB::FMIN_F128,
RTLIB::FMIN_PPCF128), Lo, Hi);
@@ -1598,21 +1639,31 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
assert(N->getValueType(0) == MVT::ppcf128 && "Unsupported XINT_TO_FP!");
EVT VT = N->getValueType(0);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
- SDValue Src = N->getOperand(0);
+ bool Strict = N->isStrictFPOpcode();
+ SDValue Src = N->getOperand(Strict ? 1 : 0);
EVT SrcVT = Src.getValueType();
- bool isSigned = N->getOpcode() == ISD::SINT_TO_FP;
+ bool isSigned = N->getOpcode() == ISD::SINT_TO_FP ||
+ N->getOpcode() == ISD::STRICT_SINT_TO_FP;
SDLoc dl(N);
+ SDValue Chain = Strict ? N->getOperand(0) : DAG.getEntryNode();
+
+ // TODO: Any other flags to propagate?
+ SDNodeFlags Flags;
+ Flags.setNoFPExcept(N->getFlags().hasNoFPExcept());
// First do an SINT_TO_FP, whether the original was signed or unsigned.
// When promoting partial word types to i32 we must honor the signedness,
// though.
if (SrcVT.bitsLE(MVT::i32)) {
// The integer can be represented exactly in an f64.
- Src = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,
- MVT::i32, Src);
Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT),
APInt(NVT.getSizeInBits(), 0)), dl, NVT);
- Hi = DAG.getNode(ISD::SINT_TO_FP, dl, NVT, Src);
+ if (Strict) {
+ Hi = DAG.getNode(N->getOpcode(), dl, DAG.getVTList(NVT, MVT::Other),
+ {Chain, Src}, Flags);
+ Chain = Hi.getValue(1);
+ } else
+ Hi = DAG.getNode(N->getOpcode(), dl, NVT, Src);
} else {
RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
if (SrcVT.bitsLE(MVT::i64)) {
@@ -1627,14 +1678,25 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
TargetLowering::MakeLibCallOptions CallOptions;
CallOptions.setSExt(true);
- Hi = TLI.makeLibCall(DAG, LC, VT, Src, CallOptions, dl).first;
- GetPairElements(Hi, Lo, Hi);
+ std::pair<SDValue, SDValue> Tmp =
+ TLI.makeLibCall(DAG, LC, VT, Src, CallOptions, dl, Chain);
+ if (Strict)
+ Chain = Tmp.second;
+ GetPairElements(Tmp.first, Lo, Hi);
}
- if (isSigned)
+ // No need to complement for unsigned 32-bit integers
+ if (isSigned || SrcVT.bitsLE(MVT::i32)) {
+ if (Strict)
+ ReplaceValueWith(SDValue(N, 1), Chain);
+
return;
+ }
// Unsigned - fix up the SINT_TO_FP value just calculated.
+ // FIXME: For unsigned i128 to ppc_fp128 conversion, we need to carefully
+ // keep semantics correctness if the integer is not exactly representable
+ // here. See ExpandLegalINT_TO_FP.
Hi = DAG.getNode(ISD::BUILD_PAIR, dl, VT, Lo, Hi);
SrcVT = Src.getValueType();
@@ -1658,11 +1720,16 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
break;
}
- // TODO: Are there fast-math-flags to propagate to this FADD?
- Lo = DAG.getNode(ISD::FADD, dl, VT, Hi,
- DAG.getConstantFP(APFloat(APFloat::PPCDoubleDouble(),
- APInt(128, Parts)),
- dl, MVT::ppcf128));
+ // TODO: Are there other fast-math-flags to propagate to this FADD?
+ SDValue NewLo = DAG.getConstantFP(
+ APFloat(APFloat::PPCDoubleDouble(), APInt(128, Parts)), dl, MVT::ppcf128);
+ if (Strict) {
+ Lo = DAG.getNode(ISD::STRICT_FADD, dl, DAG.getVTList(VT, MVT::Other),
+ {Chain, Hi, NewLo}, Flags);
+ Chain = Lo.getValue(1);
+ ReplaceValueWith(SDValue(N, 1), Chain);
+ } else
+ Lo = DAG.getNode(ISD::FADD, dl, VT, Hi, NewLo);
Lo = DAG.getSelectCC(dl, Src, DAG.getConstant(0, dl, SrcVT),
Lo, Hi, ISD::SETLT);
GetPairElements(Lo, Lo, Hi);
@@ -1702,14 +1769,16 @@ bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) {
case ISD::STRICT_FP_ROUND:
case ISD::FP_ROUND: Res = ExpandFloatOp_FP_ROUND(N); break;
case ISD::STRICT_FP_TO_SINT:
- case ISD::FP_TO_SINT: Res = ExpandFloatOp_FP_TO_SINT(N); break;
case ISD::STRICT_FP_TO_UINT:
- case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_UINT(N); break;
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_XINT(N); break;
case ISD::LROUND: Res = ExpandFloatOp_LROUND(N); break;
case ISD::LLROUND: Res = ExpandFloatOp_LLROUND(N); break;
case ISD::LRINT: Res = ExpandFloatOp_LRINT(N); break;
case ISD::LLRINT: Res = ExpandFloatOp_LLRINT(N); break;
case ISD::SELECT_CC: Res = ExpandFloatOp_SELECT_CC(N); break;
+ case ISD::STRICT_FSETCC:
+ case ISD::STRICT_FSETCCS:
case ISD::SETCC: Res = ExpandFloatOp_SETCC(N); break;
case ISD::STORE: Res = ExpandFloatOp_STORE(cast<StoreSDNode>(N),
OpNo); break;
@@ -1735,7 +1804,8 @@ bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) {
void DAGTypeLegalizer::FloatExpandSetCCOperands(SDValue &NewLHS,
SDValue &NewRHS,
ISD::CondCode &CCCode,
- const SDLoc &dl) {
+ const SDLoc &dl, SDValue &Chain,
+ bool IsSignaling) {
SDValue LHSLo, LHSHi, RHSLo, RHSHi;
GetExpandedFloat(NewLHS, LHSLo, LHSHi);
GetExpandedFloat(NewRHS, RHSLo, RHSHi);
@@ -1747,25 +1817,32 @@ void DAGTypeLegalizer::FloatExpandSetCCOperands(SDValue &NewLHS,
// BNE crN, L:
// FCMPU crN, lo1, lo2
// The following can be improved, but not that much.
- SDValue Tmp1, Tmp2, Tmp3;
- Tmp1 = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()),
- LHSHi, RHSHi, ISD::SETOEQ);
- Tmp2 = DAG.getSetCC(dl, getSetCCResultType(LHSLo.getValueType()),
- LHSLo, RHSLo, CCCode);
+ SDValue Tmp1, Tmp2, Tmp3, OutputChain;
+ Tmp1 = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()), LHSHi,
+ RHSHi, ISD::SETOEQ, Chain, IsSignaling);
+ OutputChain = Tmp1->getNumValues() > 1 ? Tmp1.getValue(1) : SDValue();
+ Tmp2 = DAG.getSetCC(dl, getSetCCResultType(LHSLo.getValueType()), LHSLo,
+ RHSLo, CCCode, OutputChain, IsSignaling);
+ OutputChain = Tmp2->getNumValues() > 1 ? Tmp2.getValue(1) : SDValue();
Tmp3 = DAG.getNode(ISD::AND, dl, Tmp1.getValueType(), Tmp1, Tmp2);
- Tmp1 = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()),
- LHSHi, RHSHi, ISD::SETUNE);
- Tmp2 = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()),
- LHSHi, RHSHi, CCCode);
+ Tmp1 =
+ DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()), LHSHi, RHSHi,
+ ISD::SETUNE, OutputChain, IsSignaling);
+ OutputChain = Tmp1->getNumValues() > 1 ? Tmp1.getValue(1) : SDValue();
+ Tmp2 = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()), LHSHi,
+ RHSHi, CCCode, OutputChain, IsSignaling);
+ OutputChain = Tmp2->getNumValues() > 1 ? Tmp2.getValue(1) : SDValue();
Tmp1 = DAG.getNode(ISD::AND, dl, Tmp1.getValueType(), Tmp1, Tmp2);
NewLHS = DAG.getNode(ISD::OR, dl, Tmp1.getValueType(), Tmp1, Tmp3);
NewRHS = SDValue(); // LHS is the result, not a compare.
+ Chain = OutputChain;
}
SDValue DAGTypeLegalizer::ExpandFloatOp_BR_CC(SDNode *N) {
SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3);
ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get();
- FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N));
+ SDValue Chain;
+ FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N), Chain);
// If ExpandSetCCOperands returned a scalar, we need to compare the result
// against zero to select between true and false values.
@@ -1820,38 +1897,23 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_ROUND(SDNode *N) {
return SDValue();
}
-SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) {
+SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_XINT(SDNode *N) {
EVT RVT = N->getValueType(0);
SDLoc dl(N);
bool IsStrict = N->isStrictFPOpcode();
+ bool Signed = N->getOpcode() == ISD::FP_TO_SINT ||
+ N->getOpcode() == ISD::STRICT_FP_TO_SINT;
SDValue Op = N->getOperand(IsStrict ? 1 : 0);
SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
- RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), RVT);
- assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!");
- TargetLowering::MakeLibCallOptions CallOptions;
- std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RVT, Op,
- CallOptions, dl, Chain);
- if (!IsStrict)
- return Tmp.first;
-
- ReplaceValueWith(SDValue(N, 1), Tmp.second);
- ReplaceValueWith(SDValue(N, 0), Tmp.first);
- return SDValue();
-}
-SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {
- EVT RVT = N->getValueType(0);
- SDLoc dl(N);
-
- bool IsStrict = N->isStrictFPOpcode();
- SDValue Op = N->getOperand(IsStrict ? 1 : 0);
- SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
- RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), RVT);
- assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!");
+ EVT NVT;
+ RTLIB::Libcall LC = findFPToIntLibcall(Op.getValueType(), RVT, NVT, Signed);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && NVT.isSimple() &&
+ "Unsupported FP_TO_XINT!");
TargetLowering::MakeLibCallOptions CallOptions;
- std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RVT, Op,
- CallOptions, dl, Chain);
+ std::pair<SDValue, SDValue> Tmp =
+ TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, dl, Chain);
if (!IsStrict)
return Tmp.first;
@@ -1863,7 +1925,8 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {
SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) {
SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get();
- FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N));
+ SDValue Chain;
+ FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N), Chain);
// If ExpandSetCCOperands returned a scalar, we need to compare the result
// against zero to select between true and false values.
@@ -1879,20 +1942,25 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) {
}
SDValue DAGTypeLegalizer::ExpandFloatOp_SETCC(SDNode *N) {
- SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
- ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get();
- FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N));
+ bool IsStrict = N->isStrictFPOpcode();
+ SDValue NewLHS = N->getOperand(IsStrict ? 1 : 0);
+ SDValue NewRHS = N->getOperand(IsStrict ? 2 : 1);
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+ ISD::CondCode CCCode =
+ cast<CondCodeSDNode>(N->getOperand(IsStrict ? 3 : 2))->get();
+ FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N), Chain,
+ N->getOpcode() == ISD::STRICT_FSETCCS);
- // If ExpandSetCCOperands returned a scalar, use it.
- if (!NewRHS.getNode()) {
- assert(NewLHS.getValueType() == N->getValueType(0) &&
- "Unexpected setcc expansion!");
- return NewLHS;
+ // FloatExpandSetCCOperands always returned a scalar.
+ assert(!NewRHS.getNode() && "Expect to return scalar");
+ assert(NewLHS.getValueType() == N->getValueType(0) &&
+ "Unexpected setcc expansion!");
+ if (Chain) {
+ ReplaceValueWith(SDValue(N, 0), NewLHS);
+ ReplaceValueWith(SDValue(N, 1), Chain);
+ return SDValue();
}
-
- // Otherwise, update N to have the operands specified.
- return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
- DAG.getCondCode(CCCode)), 0);
+ return NewLHS;
}
SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) {
@@ -2013,6 +2081,9 @@ bool DAGTypeLegalizer::PromoteFloatOperand(SDNode *N, unsigned OpNo) {
case ISD::FCOPYSIGN: R = PromoteFloatOp_FCOPYSIGN(N, OpNo); break;
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT: R = PromoteFloatOp_FP_TO_XINT(N, OpNo); break;
+ case ISD::FP_TO_SINT_SAT:
+ case ISD::FP_TO_UINT_SAT:
+ R = PromoteFloatOp_FP_TO_XINT_SAT(N, OpNo); break;
case ISD::FP_EXTEND: R = PromoteFloatOp_FP_EXTEND(N, OpNo); break;
case ISD::SELECT_CC: R = PromoteFloatOp_SELECT_CC(N, OpNo); break;
case ISD::SETCC: R = PromoteFloatOp_SETCC(N, OpNo); break;
@@ -2056,6 +2127,13 @@ SDValue DAGTypeLegalizer::PromoteFloatOp_FP_TO_XINT(SDNode *N, unsigned OpNo) {
return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), Op);
}
+SDValue DAGTypeLegalizer::PromoteFloatOp_FP_TO_XINT_SAT(SDNode *N,
+ unsigned OpNo) {
+ SDValue Op = GetPromotedFloat(N->getOperand(0));
+ return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), Op,
+ N->getOperand(1));
+}
+
SDValue DAGTypeLegalizer::PromoteFloatOp_FP_EXTEND(SDNode *N, unsigned OpNo) {
SDValue Op = GetPromotedFloat(N->getOperand(0));
EVT VT = N->getValueType(0);
@@ -2191,6 +2269,16 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
case ISD::UINT_TO_FP: R = PromoteFloatRes_XINT_TO_FP(N); break;
case ISD::UNDEF: R = PromoteFloatRes_UNDEF(N); break;
case ISD::ATOMIC_SWAP: R = BitcastToInt_ATOMIC_SWAP(N); break;
+ case ISD::VECREDUCE_FADD:
+ case ISD::VECREDUCE_FMUL:
+ case ISD::VECREDUCE_FMIN:
+ case ISD::VECREDUCE_FMAX:
+ R = PromoteFloatRes_VECREDUCE(N);
+ break;
+ case ISD::VECREDUCE_SEQ_FADD:
+ case ISD::VECREDUCE_SEQ_FMUL:
+ R = PromoteFloatRes_VECREDUCE_SEQ(N);
+ break;
}
if (R.getNode())
@@ -2422,6 +2510,20 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_UNDEF(SDNode *N) {
N->getValueType(0)));
}
+SDValue DAGTypeLegalizer::PromoteFloatRes_VECREDUCE(SDNode *N) {
+ // Expand and promote recursively.
+ // TODO: This is non-optimal, but dealing with the concurrently happening
+ // vector-legalization is non-trivial. We could do something similar to
+ // PromoteFloatRes_EXTRACT_VECTOR_ELT here.
+ ReplaceValueWith(SDValue(N, 0), TLI.expandVecReduce(N, DAG));
+ return SDValue();
+}
+
+SDValue DAGTypeLegalizer::PromoteFloatRes_VECREDUCE_SEQ(SDNode *N) {
+ ReplaceValueWith(SDValue(N, 0), TLI.expandVecReduceSeq(N, DAG));
+ return SDValue();
+}
+
SDValue DAGTypeLegalizer::BitcastToInt_ATOMIC_SWAP(SDNode *N) {
EVT VT = N->getValueType(0);
@@ -2530,6 +2632,16 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) {
case ISD::UINT_TO_FP: R = SoftPromoteHalfRes_XINT_TO_FP(N); break;
case ISD::UNDEF: R = SoftPromoteHalfRes_UNDEF(N); break;
case ISD::ATOMIC_SWAP: R = BitcastToInt_ATOMIC_SWAP(N); break;
+ case ISD::VECREDUCE_FADD:
+ case ISD::VECREDUCE_FMUL:
+ case ISD::VECREDUCE_FMIN:
+ case ISD::VECREDUCE_FMAX:
+ R = SoftPromoteHalfRes_VECREDUCE(N);
+ break;
+ case ISD::VECREDUCE_SEQ_FADD:
+ case ISD::VECREDUCE_SEQ_FMUL:
+ R = SoftPromoteHalfRes_VECREDUCE_SEQ(N);
+ break;
}
if (R.getNode())
@@ -2722,6 +2834,18 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_BinOp(SDNode *N) {
return DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, Res);
}
+SDValue DAGTypeLegalizer::SoftPromoteHalfRes_VECREDUCE(SDNode *N) {
+ // Expand and soften recursively.
+ ReplaceValueWith(SDValue(N, 0), TLI.expandVecReduce(N, DAG));
+ return SDValue();
+}
+
+SDValue DAGTypeLegalizer::SoftPromoteHalfRes_VECREDUCE_SEQ(SDNode *N) {
+ // Expand and soften.
+ ReplaceValueWith(SDValue(N, 0), TLI.expandVecReduceSeq(N, DAG));
+ return SDValue();
+}
+
//===----------------------------------------------------------------------===//
// Half Operand Soft Promotion
//===----------------------------------------------------------------------===//
@@ -2753,6 +2877,9 @@ bool DAGTypeLegalizer::SoftPromoteHalfOperand(SDNode *N, unsigned OpNo) {
case ISD::FCOPYSIGN: Res = SoftPromoteHalfOp_FCOPYSIGN(N, OpNo); break;
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT: Res = SoftPromoteHalfOp_FP_TO_XINT(N); break;
+ case ISD::FP_TO_SINT_SAT:
+ case ISD::FP_TO_UINT_SAT:
+ Res = SoftPromoteHalfOp_FP_TO_XINT_SAT(N); break;
case ISD::STRICT_FP_EXTEND:
case ISD::FP_EXTEND: Res = SoftPromoteHalfOp_FP_EXTEND(N); break;
case ISD::SELECT_CC: Res = SoftPromoteHalfOp_SELECT_CC(N, OpNo); break;
@@ -2822,6 +2949,20 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_TO_XINT(SDNode *N) {
return DAG.getNode(N->getOpcode(), dl, N->getValueType(0), Res);
}
+SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_TO_XINT_SAT(SDNode *N) {
+ SDValue Op = N->getOperand(0);
+ SDLoc dl(N);
+
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType());
+
+ Op = GetSoftPromotedHalf(Op);
+
+ SDValue Res = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op);
+
+ return DAG.getNode(N->getOpcode(), dl, N->getValueType(0), Res,
+ N->getOperand(1));
+}
+
SDValue DAGTypeLegalizer::SoftPromoteHalfOp_SELECT_CC(SDNode *N,
unsigned OpNo) {
assert(OpNo == 0 && "Can only soften the comparison values");
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 74071f763dbf..4a686bc227de 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -62,7 +62,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::Constant: Res = PromoteIntRes_Constant(N); break;
case ISD::CTLZ_ZERO_UNDEF:
case ISD::CTLZ: Res = PromoteIntRes_CTLZ(N); break;
- case ISD::CTPOP: Res = PromoteIntRes_CTPOP(N); break;
+ case ISD::PARITY:
+ case ISD::CTPOP: Res = PromoteIntRes_CTPOP_PARITY(N); break;
case ISD::CTTZ_ZERO_UNDEF:
case ISD::CTTZ: Res = PromoteIntRes_CTTZ(N); break;
case ISD::EXTRACT_VECTOR_ELT:
@@ -81,7 +82,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::SMIN:
case ISD::SMAX: Res = PromoteIntRes_SExtIntBinOp(N); break;
case ISD::UMIN:
- case ISD::UMAX: Res = PromoteIntRes_ZExtIntBinOp(N); break;
+ case ISD::UMAX: Res = PromoteIntRes_UMINUMAX(N); break;
case ISD::SHL: Res = PromoteIntRes_SHL(N); break;
case ISD::SIGN_EXTEND_INREG:
@@ -122,6 +123,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT: Res = PromoteIntRes_FP_TO_XINT(N); break;
+ case ISD::FP_TO_SINT_SAT:
+ case ISD::FP_TO_UINT_SAT:
+ Res = PromoteIntRes_FP_TO_XINT_SAT(N); break;
+
case ISD::FP_TO_FP16: Res = PromoteIntRes_FP_TO_FP16(N); break;
case ISD::FLT_ROUNDS_: Res = PromoteIntRes_FLT_ROUNDS(N); break;
@@ -151,10 +156,15 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::ADDCARRY:
case ISD::SUBCARRY: Res = PromoteIntRes_ADDSUBCARRY(N, ResNo); break;
+ case ISD::SADDO_CARRY:
+ case ISD::SSUBO_CARRY: Res = PromoteIntRes_SADDSUBO_CARRY(N, ResNo); break;
+
case ISD::SADDSAT:
case ISD::UADDSAT:
case ISD::SSUBSAT:
- case ISD::USUBSAT: Res = PromoteIntRes_ADDSUBSAT(N); break;
+ case ISD::USUBSAT:
+ case ISD::SSHLSAT:
+ case ISD::USHLSAT: Res = PromoteIntRes_ADDSUBSHLSAT(N); break;
case ISD::SMULFIX:
case ISD::SMULFIXSAT:
@@ -205,6 +215,16 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::FREEZE:
Res = PromoteIntRes_FREEZE(N);
break;
+
+ case ISD::ROTL:
+ case ISD::ROTR:
+ Res = PromoteIntRes_Rotate(N);
+ break;
+
+ case ISD::FSHL:
+ case ISD::FSHR:
+ Res = PromoteIntRes_FunnelShift(N);
+ break;
}
// If the result is null then the sub-method took care of registering it.
@@ -491,10 +511,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) {
NVT));
}
-SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP(SDNode *N) {
- // Zero extend to the promoted type and do the count there.
+SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP_PARITY(SDNode *N) {
+ // Zero extend to the promoted type and do the count or parity there.
SDValue Op = ZExtPromotedInteger(N->getOperand(0));
- return DAG.getNode(ISD::CTPOP, SDLoc(N), Op.getValueType(), Op);
+ return DAG.getNode(N->getOpcode(), SDLoc(N), Op.getValueType(), Op);
}
SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) {
@@ -559,8 +579,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
SDValue Res;
if (N->isStrictFPOpcode()) {
- Res = DAG.getNode(NewOpc, dl, { NVT, MVT::Other },
- { N->getOperand(0), N->getOperand(1) });
+ Res = DAG.getNode(NewOpc, dl, {NVT, MVT::Other},
+ {N->getOperand(0), N->getOperand(1)});
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
@@ -580,6 +600,14 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
DAG.getValueType(N->getValueType(0).getScalarType()));
}
+SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT_SAT(SDNode *N) {
+ // Promote the result type, while keeping the original width in Op1.
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDLoc dl(N);
+ return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0),
+ N->getOperand(1));
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_FP16(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDLoc dl(N);
@@ -663,12 +691,17 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MGATHER(MaskedGatherSDNode *N) {
assert(NVT == ExtPassThru.getValueType() &&
"Gather result type and the passThru argument type should be the same");
+ ISD::LoadExtType ExtType = N->getExtensionType();
+ if (ExtType == ISD::NON_EXTLOAD)
+ ExtType = ISD::EXTLOAD;
+
SDLoc dl(N);
SDValue Ops[] = {N->getChain(), ExtPassThru, N->getMask(), N->getBasePtr(),
N->getIndex(), N->getScale() };
SDValue Res = DAG.getMaskedGather(DAG.getVTList(NVT, MVT::Other),
N->getMemoryVT(), dl, Ops,
- N->getMemOperand(), N->getIndexType());
+ N->getMemOperand(), N->getIndexType(),
+ ExtType);
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
@@ -700,11 +733,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) {
return DAG.getBoolExtOrTrunc(Res.getValue(1), dl, NVT, VT);
}
-SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSAT(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSHLSAT(SDNode *N) {
// If the promoted type is legal, we can convert this to:
// 1. ANY_EXTEND iN to iM
// 2. SHL by M-N
- // 3. [US][ADD|SUB]SAT
+ // 3. [US][ADD|SUB|SHL]SAT
// 4. L/ASHR by M-N
// Else it is more efficient to convert this to a min and a max
// operation in the higher precision arithmetic.
@@ -714,9 +747,13 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSAT(SDNode *N) {
unsigned OldBits = Op1.getScalarValueSizeInBits();
unsigned Opcode = N->getOpcode();
+ bool IsShift = Opcode == ISD::USHLSAT || Opcode == ISD::SSHLSAT;
SDValue Op1Promoted, Op2Promoted;
- if (Opcode == ISD::UADDSAT || Opcode == ISD::USUBSAT) {
+ if (IsShift) {
+ Op1Promoted = GetPromotedInteger(Op1);
+ Op2Promoted = ZExtPromotedInteger(Op2);
+ } else if (Opcode == ISD::UADDSAT || Opcode == ISD::USUBSAT) {
Op1Promoted = ZExtPromotedInteger(Op1);
Op2Promoted = ZExtPromotedInteger(Op2);
} else {
@@ -726,20 +763,24 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSAT(SDNode *N) {
EVT PromotedType = Op1Promoted.getValueType();
unsigned NewBits = PromotedType.getScalarSizeInBits();
- if (TLI.isOperationLegalOrCustom(Opcode, PromotedType)) {
+ // Shift cannot use a min/max expansion, we can't detect overflow if all of
+ // the bits have been shifted out.
+ if (IsShift || TLI.isOperationLegalOrCustom(Opcode, PromotedType)) {
unsigned ShiftOp;
switch (Opcode) {
case ISD::SADDSAT:
case ISD::SSUBSAT:
+ case ISD::SSHLSAT:
ShiftOp = ISD::SRA;
break;
case ISD::UADDSAT:
case ISD::USUBSAT:
+ case ISD::USHLSAT:
ShiftOp = ISD::SRL;
break;
default:
llvm_unreachable("Expected opcode to be signed or unsigned saturation "
- "addition or subtraction");
+ "addition, subtraction or left shift");
}
unsigned SHLAmount = NewBits - OldBits;
@@ -747,8 +788,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSAT(SDNode *N) {
SDValue ShiftAmount = DAG.getConstant(SHLAmount, dl, SHVT);
Op1Promoted =
DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted, ShiftAmount);
- Op2Promoted =
- DAG.getNode(ISD::SHL, dl, PromotedType, Op2Promoted, ShiftAmount);
+ if (!IsShift)
+ Op2Promoted =
+ DAG.getNode(ISD::SHL, dl, PromotedType, Op2Promoted, ShiftAmount);
SDValue Result =
DAG.getNode(Opcode, dl, PromotedType, Op1Promoted, Op2Promoted);
@@ -1076,6 +1118,15 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ZExtIntBinOp(SDNode *N) {
LHS.getValueType(), LHS, RHS);
}
+SDValue DAGTypeLegalizer::PromoteIntRes_UMINUMAX(SDNode *N) {
+ // It doesn't matter if we sign extend or zero extend in the inputs. So do
+ // whatever is best for the target.
+ SDValue LHS = SExtOrZExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = SExtOrZExtPromotedInteger(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), SDLoc(N),
+ LHS.getValueType(), LHS, RHS);
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) {
// The input value must be properly sign extended.
SDValue LHS = SExtPromotedInteger(N->getOperand(0));
@@ -1094,6 +1145,60 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) {
return DAG.getNode(ISD::SRL, SDLoc(N), LHS.getValueType(), LHS, RHS);
}
+SDValue DAGTypeLegalizer::PromoteIntRes_Rotate(SDNode *N) {
+ // Lower the rotate to shifts and ORs which can be promoted.
+ SDValue Res;
+ TLI.expandROT(N, true /*AllowVectorOps*/, Res, DAG);
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return SDValue();
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_FunnelShift(SDNode *N) {
+ SDValue Hi = GetPromotedInteger(N->getOperand(0));
+ SDValue Lo = GetPromotedInteger(N->getOperand(1));
+ SDValue Amount = GetPromotedInteger(N->getOperand(2));
+
+ SDLoc DL(N);
+ EVT OldVT = N->getOperand(0).getValueType();
+ EVT VT = Lo.getValueType();
+ unsigned Opcode = N->getOpcode();
+ bool IsFSHR = Opcode == ISD::FSHR;
+ unsigned OldBits = OldVT.getScalarSizeInBits();
+ unsigned NewBits = VT.getScalarSizeInBits();
+
+ // Amount has to be interpreted modulo the old bit width.
+ Amount =
+ DAG.getNode(ISD::UREM, DL, VT, Amount, DAG.getConstant(OldBits, DL, VT));
+
+ // If the promoted type is twice the size (or more), then we use the
+ // traditional funnel 'double' shift codegen. This isn't necessary if the
+ // shift amount is constant.
+ // fshl(x,y,z) -> (((aext(x) << bw) | zext(y)) << (z % bw)) >> bw.
+ // fshr(x,y,z) -> (((aext(x) << bw) | zext(y)) >> (z % bw)).
+ if (NewBits >= (2 * OldBits) && !isa<ConstantSDNode>(Amount) &&
+ !TLI.isOperationLegalOrCustom(Opcode, VT)) {
+ SDValue HiShift = DAG.getConstant(OldBits, DL, VT);
+ Hi = DAG.getNode(ISD::SHL, DL, VT, Hi, HiShift);
+ Lo = DAG.getZeroExtendInReg(Lo, DL, OldVT);
+ SDValue Res = DAG.getNode(ISD::OR, DL, VT, Hi, Lo);
+ Res = DAG.getNode(IsFSHR ? ISD::SRL : ISD::SHL, DL, VT, Res, Amount);
+ if (!IsFSHR)
+ Res = DAG.getNode(ISD::SRL, DL, VT, Res, HiShift);
+ return Res;
+ }
+
+ // Shift Lo up to occupy the upper bits of the promoted type.
+ SDValue ShiftOffset = DAG.getConstant(NewBits - OldBits, DL, VT);
+ Lo = DAG.getNode(ISD::SHL, DL, VT, Lo, ShiftOffset);
+
+ // Increase Amount to shift the result into the lower bits of the promoted
+ // type.
+ if (IsFSHR)
+ Amount = DAG.getNode(ISD::ADD, DL, VT, Amount, ShiftOffset);
+
+ return DAG.getNode(Opcode, DL, VT, Hi, Lo, Amount);
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Res;
@@ -1181,7 +1286,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) {
}
// Handle promotion for the ADDE/SUBE/ADDCARRY/SUBCARRY nodes. Notice that
-// the third operand of ADDE/SUBE nodes is carry flag, which differs from
+// the third operand of ADDE/SUBE nodes is carry flag, which differs from
// the ADDCARRY/SUBCARRY nodes in that the third operand is carry Boolean.
SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo) {
if (ResNo == 1)
@@ -1212,6 +1317,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo) {
return SDValue(Res.getNode(), 0);
}
+SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO_CARRY(SDNode *N,
+ unsigned ResNo) {
+ assert(ResNo == 1 && "Don't know how to promote other results yet.");
+ return PromoteIntRes_Overflow(N);
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_ABS(SDNode *N) {
SDValue Op0 = SExtPromotedInteger(N->getOperand(0));
return DAG.getNode(ISD::ABS, SDLoc(N), Op0.getValueType(), Op0);
@@ -1394,6 +1505,8 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::ROTL:
case ISD::ROTR: Res = PromoteIntOp_Shift(N); break;
+ case ISD::SADDO_CARRY:
+ case ISD::SSUBO_CARRY:
case ISD::ADDCARRY:
case ISD::SUBCARRY: Res = PromoteIntOp_ADDSUBCARRY(N, OpNo); break;
@@ -1620,8 +1733,9 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) {
EVT OpTy = N->getOperand(1).getValueType();
if (N->getOpcode() == ISD::VSELECT)
- if (SDValue Res = WidenVSELECTAndMask(N))
- return Res;
+ if (SDValue Res = WidenVSELECTMask(N))
+ return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
+ Res, N->getOperand(1), N->getOperand(2));
// Promote all the way up to the canonical SetCC type.
EVT OpVT = N->getOpcode() == ISD::SELECT ? OpTy.getScalarType() : OpTy;
@@ -1763,6 +1877,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MGATHER(MaskedGatherSDNode *N,
SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N,
unsigned OpNo) {
+ bool TruncateStore = N->isTruncatingStore();
SmallVector<SDValue, 5> NewOps(N->op_begin(), N->op_end());
if (OpNo == 2) {
// The Mask
@@ -1775,9 +1890,17 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N,
NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo));
else
NewOps[OpNo] = ZExtPromotedInteger(N->getOperand(OpNo));
- } else
+
+ N->setIndexType(TLI.getCanonicalIndexType(N->getIndexType(),
+ N->getMemoryVT(), NewOps[OpNo]));
+ } else {
NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo));
- return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
+ TruncateStore = true;
+ }
+
+ return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), N->getMemoryVT(),
+ SDLoc(N), NewOps, N->getMemOperand(),
+ N->getIndexType(), TruncateStore);
}
SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) {
@@ -1921,6 +2044,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::AssertZext: ExpandIntRes_AssertZext(N, Lo, Hi); break;
case ISD::BITREVERSE: ExpandIntRes_BITREVERSE(N, Lo, Hi); break;
case ISD::BSWAP: ExpandIntRes_BSWAP(N, Lo, Hi); break;
+ case ISD::PARITY: ExpandIntRes_PARITY(N, Lo, Hi); break;
case ISD::Constant: ExpandIntRes_Constant(N, Lo, Hi); break;
case ISD::ABS: ExpandIntRes_ABS(N, Lo, Hi); break;
case ISD::CTLZ_ZERO_UNDEF:
@@ -1933,6 +2057,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::FP_TO_SINT: ExpandIntRes_FP_TO_SINT(N, Lo, Hi); break;
case ISD::STRICT_FP_TO_UINT:
case ISD::FP_TO_UINT: ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break;
+ case ISD::FP_TO_SINT_SAT:
+ case ISD::FP_TO_UINT_SAT: ExpandIntRes_FP_TO_XINT_SAT(N, Lo, Hi); break;
case ISD::STRICT_LLROUND:
case ISD::STRICT_LLRINT:
case ISD::LLROUND:
@@ -2009,6 +2135,9 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::ADDCARRY:
case ISD::SUBCARRY: ExpandIntRes_ADDSUBCARRY(N, Lo, Hi); break;
+ case ISD::SADDO_CARRY:
+ case ISD::SSUBO_CARRY: ExpandIntRes_SADDSUBO_CARRY(N, Lo, Hi); break;
+
case ISD::SHL:
case ISD::SRA:
case ISD::SRL: ExpandIntRes_Shift(N, Lo, Hi); break;
@@ -2025,6 +2154,9 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::SSUBSAT:
case ISD::USUBSAT: ExpandIntRes_ADDSUBSAT(N, Lo, Hi); break;
+ case ISD::SSHLSAT:
+ case ISD::USHLSAT: ExpandIntRes_SHLSAT(N, Lo, Hi); break;
+
case ISD::SMULFIX:
case ISD::SMULFIXSAT:
case ISD::UMULFIX:
@@ -2044,6 +2176,16 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::VECREDUCE_SMIN:
case ISD::VECREDUCE_UMAX:
case ISD::VECREDUCE_UMIN: ExpandIntRes_VECREDUCE(N, Lo, Hi); break;
+
+ case ISD::ROTL:
+ case ISD::ROTR:
+ ExpandIntRes_Rotate(N, Lo, Hi);
+ break;
+
+ case ISD::FSHL:
+ case ISD::FSHR:
+ ExpandIntRes_FunnelShift(N, Lo, Hi);
+ break;
}
// If Lo/Hi is null, the sub-method took care of registering results etc.
@@ -2055,12 +2197,22 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) {
unsigned Opc = Node->getOpcode();
MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT();
- RTLIB::Libcall LC = RTLIB::getSYNC(Opc, VT);
- assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!");
-
+ AtomicOrdering order = cast<AtomicSDNode>(Node)->getOrdering();
+ // Lower to outline atomic libcall if outline atomics enabled,
+ // or to sync libcall otherwise
+ RTLIB::Libcall LC = RTLIB::getOUTLINE_ATOMIC(Opc, order, VT);
EVT RetVT = Node->getValueType(0);
- SmallVector<SDValue, 4> Ops(Node->op_begin() + 1, Node->op_end());
TargetLowering::MakeLibCallOptions CallOptions;
+ SmallVector<SDValue, 4> Ops;
+ if (TLI.getLibcallName(LC)) {
+ Ops.append(Node->op_begin() + 2, Node->op_end());
+ Ops.push_back(Node->getOperand(1));
+ } else {
+ LC = RTLIB::getSYNC(Opc, VT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL &&
+ "Unexpected atomic op or value type!");
+ Ops.append(Node->op_begin() + 1, Node->op_end());
+ }
return TLI.makeLibCall(DAG, LC, RetVT, Ops, CallOptions, SDLoc(Node),
Node->getOperand(0));
}
@@ -2619,6 +2771,26 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBCARRY(SDNode *N,
ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
}
+void DAGTypeLegalizer::ExpandIntRes_SADDSUBO_CARRY(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ // Expand the subcomponents.
+ SDValue LHSL, LHSH, RHSL, RHSH;
+ SDLoc dl(N);
+ GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
+ GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
+ SDVTList VTList = DAG.getVTList(LHSL.getValueType(), N->getValueType(1));
+
+ // We need to use an unsigned carry op for the lo part.
+ unsigned CarryOp = N->getOpcode() == ISD::SADDO_CARRY ? ISD::ADDCARRY
+ : ISD::SUBCARRY;
+ Lo = DAG.getNode(CarryOp, dl, VTList, { LHSL, RHSL, N->getOperand(2) });
+ Hi = DAG.getNode(N->getOpcode(), dl, VTList, { LHSH, RHSH, Lo.getValue(1) });
+
+ // Legalized the flag result - switch anything that used the old flag to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
+}
+
void DAGTypeLegalizer::ExpandIntRes_ANY_EXTEND(SDNode *N,
SDValue &Lo, SDValue &Hi) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
@@ -2700,6 +2872,17 @@ void DAGTypeLegalizer::ExpandIntRes_BSWAP(SDNode *N,
Hi = DAG.getNode(ISD::BSWAP, dl, Hi.getValueType(), Hi);
}
+void DAGTypeLegalizer::ExpandIntRes_PARITY(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDLoc dl(N);
+ // parity(HiLo) -> parity(Lo^Hi)
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ EVT NVT = Lo.getValueType();
+ Lo =
+ DAG.getNode(ISD::PARITY, dl, NVT, DAG.getNode(ISD::XOR, dl, NVT, Lo, Hi));
+ Hi = DAG.getConstant(0, dl, NVT);
+}
+
void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N,
SDValue &Lo, SDValue &Hi) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
@@ -2717,16 +2900,38 @@ void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N,
void DAGTypeLegalizer::ExpandIntRes_ABS(SDNode *N, SDValue &Lo, SDValue &Hi) {
SDLoc dl(N);
+ SDValue N0 = N->getOperand(0);
+ GetExpandedInteger(N0, Lo, Hi);
+ EVT NVT = Lo.getValueType();
+
+ // If we have ADDCARRY, use the expanded form of the sra+add+xor sequence we
+ // use in LegalizeDAG. The ADD part of the expansion is based on
+ // ExpandIntRes_ADDSUB which also uses ADDCARRY/UADDO after checking that
+ // ADDCARRY is LegalOrCustom. Each of the pieces here can be further expanded
+ // if needed. Shift expansion has a special case for filling with sign bits
+ // so that we will only end up with one SRA.
+ bool HasAddCarry = TLI.isOperationLegalOrCustom(
+ ISD::ADDCARRY, TLI.getTypeToExpandTo(*DAG.getContext(), NVT));
+ if (HasAddCarry) {
+ EVT ShiftAmtTy = getShiftAmountTyForConstant(NVT, TLI, DAG);
+ SDValue Sign =
+ DAG.getNode(ISD::SRA, dl, NVT, Hi,
+ DAG.getConstant(NVT.getSizeInBits() - 1, dl, ShiftAmtTy));
+ SDVTList VTList = DAG.getVTList(NVT, getSetCCResultType(NVT));
+ Lo = DAG.getNode(ISD::UADDO, dl, VTList, Lo, Sign);
+ Hi = DAG.getNode(ISD::ADDCARRY, dl, VTList, Hi, Sign, Lo.getValue(1));
+ Lo = DAG.getNode(ISD::XOR, dl, NVT, Lo, Sign);
+ Hi = DAG.getNode(ISD::XOR, dl, NVT, Hi, Sign);
+ return;
+ }
+
// abs(HiLo) -> (Hi < 0 ? -HiLo : HiLo)
EVT VT = N->getValueType(0);
- SDValue N0 = N->getOperand(0);
SDValue Neg = DAG.getNode(ISD::SUB, dl, VT,
DAG.getConstant(0, dl, VT), N0);
SDValue NegLo, NegHi;
SplitInteger(Neg, NegLo, NegHi);
- GetExpandedInteger(N0, Lo, Hi);
- EVT NVT = Lo.getValueType();
SDValue HiIsNeg = DAG.getSetCC(dl, getSetCCResultType(NVT),
DAG.getConstant(0, dl, NVT), Hi, ISD::SETGT);
Lo = DAG.getSelect(dl, NVT, HiIsNeg, NegLo, Lo);
@@ -2859,6 +3064,12 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo,
ReplaceValueWith(SDValue(N, 1), Tmp.second);
}
+void DAGTypeLegalizer::ExpandIntRes_FP_TO_XINT_SAT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Res = TLI.expandFP_TO_INT_SAT(N, DAG);
+ SplitInteger(Res, Lo, Hi);
+}
+
void DAGTypeLegalizer::ExpandIntRes_LLROUND_LLRINT(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue Op = N->getOperand(N->isStrictFPOpcode() ? 1 : 0);
@@ -2929,7 +3140,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
ReplaceValueWith(SDValue(N, 1), Swap.getValue(2));
return;
}
-
+
if (ISD::isNormalLoad(N)) {
ExpandRes_NormalLoad(N, Lo, Hi);
return;
@@ -2983,7 +3194,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
// Increment the pointer to the other half.
unsigned IncrementSize = NVT.getSizeInBits()/8;
- Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl);
+ Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(IncrementSize), dl);
Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr,
N->getPointerInfo().getWithOffset(IncrementSize), NEVT,
N->getOriginalAlign(), MMOFlags, AAInfo);
@@ -3007,7 +3218,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
N->getOriginalAlign(), MMOFlags, AAInfo);
// Increment the pointer to the other half.
- Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl);
+ Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(IncrementSize), dl);
// Load the rest of the low bits.
Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr,
N->getPointerInfo().getWithOffset(IncrementSize),
@@ -3147,6 +3358,12 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBSAT(SDNode *N, SDValue &Lo,
SplitInteger(Result, Lo, Hi);
}
+void DAGTypeLegalizer::ExpandIntRes_SHLSAT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Result = TLI.expandShlSat(N, DAG);
+ SplitInteger(Result, Lo, Hi);
+}
+
/// This performs an expansion of the integer result for a fixed point
/// multiplication. The default expansion performs rounding down towards
/// negative infinity, though targets that do care about rounding should specify
@@ -3385,40 +3602,66 @@ void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node,
SDValue RHS = Node->getOperand(1);
SDLoc dl(Node);
- // Expand the result by simply replacing it with the equivalent
- // non-overflow-checking operation.
- SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ?
- ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
- LHS, RHS);
- SplitInteger(Sum, Lo, Hi);
+ SDValue Ovf;
- // Compute the overflow.
- //
- // LHSSign -> LHS >= 0
- // RHSSign -> RHS >= 0
- // SumSign -> Sum >= 0
- //
- // Add:
- // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
- // Sub:
- // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
- //
- EVT OType = Node->getValueType(1);
- SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
+ unsigned CarryOp;
+ switch(Node->getOpcode()) {
+ default: llvm_unreachable("Node has unexpected Opcode");
+ case ISD::SADDO: CarryOp = ISD::SADDO_CARRY; break;
+ case ISD::SSUBO: CarryOp = ISD::SSUBO_CARRY; break;
+ }
+
+ bool HasCarryOp = TLI.isOperationLegalOrCustom(
+ CarryOp, TLI.getTypeToExpandTo(*DAG.getContext(), LHS.getValueType()));
- SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE);
- SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE);
- SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign,
- Node->getOpcode() == ISD::SADDO ?
- ISD::SETEQ : ISD::SETNE);
+ if (HasCarryOp) {
+ // Expand the subcomponents.
+ SDValue LHSL, LHSH, RHSL, RHSH;
+ GetExpandedInteger(LHS, LHSL, LHSH);
+ GetExpandedInteger(RHS, RHSL, RHSH);
+ SDVTList VTList = DAG.getVTList(LHSL.getValueType(), Node->getValueType(1));
+
+ Lo = DAG.getNode(Node->getOpcode() == ISD::SADDO ?
+ ISD::UADDO : ISD::USUBO, dl, VTList, { LHSL, RHSL });
+ Hi = DAG.getNode(CarryOp, dl, VTList, { LHSH, RHSH, Lo.getValue(1) });
+
+ Ovf = Hi.getValue(1);
+ } else {
+ // Expand the result by simply replacing it with the equivalent
+ // non-overflow-checking operation.
+ SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ?
+ ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
+ LHS, RHS);
+ SplitInteger(Sum, Lo, Hi);
+
+ // Compute the overflow.
+ //
+ // LHSSign -> LHS >= 0
+ // RHSSign -> RHS >= 0
+ // SumSign -> Sum >= 0
+ //
+ // Add:
+ // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
+ // Sub:
+ // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
+ //
+ EVT OType = Node->getValueType(1);
+ SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
+
+ SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE);
+ SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE);
+ SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign,
+ Node->getOpcode() == ISD::SADDO ?
+ ISD::SETEQ : ISD::SETNE);
- SDValue SumSign = DAG.getSetCC(dl, OType, Sum, Zero, ISD::SETGE);
- SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE);
+ SDValue SumSign = DAG.getSetCC(dl, OType, Sum, Zero, ISD::SETGE);
+ SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE);
- SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE);
+ Ovf = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE);
+ }
// Use the calculated overflow everywhere.
- ReplaceValueWith(SDValue(Node, 1), Cmp);
+ ReplaceValueWith(SDValue(Node, 1), Ovf);
}
void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N,
@@ -3874,6 +4117,22 @@ void DAGTypeLegalizer::ExpandIntRes_VECREDUCE(SDNode *N,
SplitInteger(Res, Lo, Hi);
}
+void DAGTypeLegalizer::ExpandIntRes_Rotate(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ // Lower the rotate to shifts and ORs which can be expanded.
+ SDValue Res;
+ TLI.expandROT(N, true /*AllowVectorOps*/, Res, DAG);
+ SplitInteger(Res, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_FunnelShift(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ // Lower the funnel shift to shifts and ORs which can be expanded.
+ SDValue Res;
+ TLI.expandFunnelShift(N, Res, DAG);
+ SplitInteger(Res, Lo, Hi);
+}
+
//===----------------------------------------------------------------------===//
// Integer Operand Expansion
//===----------------------------------------------------------------------===//
@@ -4246,7 +4505,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
// Increment the pointer to the other half.
unsigned IncrementSize = NVT.getSizeInBits()/8;
- Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
+ Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize));
Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr,
N->getPointerInfo().getWithOffset(IncrementSize),
NEVT, N->getOriginalAlign(), MMOFlags, AAInfo);
@@ -4281,7 +4540,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
N->getOriginalAlign(), MMOFlags, AAInfo);
// Increment the pointer to the other half.
- Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
+ Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize));
// Store the lowest ExcessBits bits in the second half.
Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr,
N->getPointerInfo().getWithOffset(IncrementSize),
@@ -4586,8 +4845,23 @@ SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N) {
SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) {
SDLoc dl(N);
+
+ EVT ResVT = N->getValueType(0);
unsigned NumElems = N->getNumOperands();
+ if (ResVT.isScalableVector()) {
+ SDValue ResVec = DAG.getUNDEF(ResVT);
+
+ for (unsigned OpIdx = 0; OpIdx < NumElems; ++OpIdx) {
+ SDValue Op = N->getOperand(OpIdx);
+ unsigned OpNumElts = Op.getValueType().getVectorMinNumElements();
+ ResVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, ResVec, Op,
+ DAG.getIntPtrConstant(OpIdx * OpNumElts, dl));
+ }
+
+ return ResVec;
+ }
+
EVT RetSclrTy = N->getValueType(0).getVectorElementType();
SmallVector<SDValue, 8> NewOps;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index ae087d3bbd8c..a59f03854775 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -663,8 +663,7 @@ void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) {
// Process the list of nodes that need to be reanalyzed.
while (!NodesToAnalyze.empty()) {
- SDNode *N = NodesToAnalyze.back();
- NodesToAnalyze.pop_back();
+ SDNode *N = NodesToAnalyze.pop_back_val();
if (N->getNodeId() != DAGTypeLegalizer::NewNode)
// The node was analyzed while reanalyzing an earlier node - it is safe
// to skip. Note that this is not a morphing node - otherwise it would
@@ -753,7 +752,10 @@ void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) {
// Note that in some cases vector operation operands may be greater than
// the vector element type. For example BUILD_VECTOR of type <1 x i1> with
// a constant i8 operand.
- assert(Result.getValueSizeInBits() >= Op.getScalarValueSizeInBits() &&
+
+ // We don't currently support the scalarization of scalable vector types.
+ assert(Result.getValueSizeInBits().getFixedSize() >=
+ Op.getScalarValueSizeInBits() &&
"Invalid type for scalarized vector");
AnalyzeNewValue(Result);
@@ -955,11 +957,12 @@ bool DAGTypeLegalizer::CustomWidenLowerNode(SDNode *N, EVT VT) {
assert(Results.size() == N->getNumValues() &&
"Custom lowering returned the wrong number of results!");
for (unsigned i = 0, e = Results.size(); i != e; ++i) {
- // If this is a chain output just replace it.
- if (Results[i].getValueType() == MVT::Other)
- ReplaceValueWith(SDValue(N, i), Results[i]);
- else
+ // If this is a chain output or already widened just replace it.
+ bool WasWidened = SDValue(N, i).getValueType() != Results[i].getValueType();
+ if (WasWidened)
SetWidenedVector(SDValue(N, i), Results[i]);
+ else
+ ReplaceValueWith(SDValue(N, i), Results[i]);
}
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 0fa6d653a836..630a0a9adaf7 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -311,10 +311,11 @@ private:
SDValue PromoteIntRes_BUILD_PAIR(SDNode *N);
SDValue PromoteIntRes_Constant(SDNode *N);
SDValue PromoteIntRes_CTLZ(SDNode *N);
- SDValue PromoteIntRes_CTPOP(SDNode *N);
+ SDValue PromoteIntRes_CTPOP_PARITY(SDNode *N);
SDValue PromoteIntRes_CTTZ(SDNode *N);
SDValue PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue PromoteIntRes_FP_TO_XINT(SDNode *N);
+ SDValue PromoteIntRes_FP_TO_XINT_SAT(SDNode *N);
SDValue PromoteIntRes_FP_TO_FP16(SDNode *N);
SDValue PromoteIntRes_FREEZE(SDNode *N);
SDValue PromoteIntRes_INT_EXTEND(SDNode *N);
@@ -331,22 +332,26 @@ private:
SDValue PromoteIntRes_SimpleIntBinOp(SDNode *N);
SDValue PromoteIntRes_ZExtIntBinOp(SDNode *N);
SDValue PromoteIntRes_SExtIntBinOp(SDNode *N);
+ SDValue PromoteIntRes_UMINUMAX(SDNode *N);
SDValue PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N);
SDValue PromoteIntRes_SRA(SDNode *N);
SDValue PromoteIntRes_SRL(SDNode *N);
SDValue PromoteIntRes_TRUNCATE(SDNode *N);
SDValue PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo);
SDValue PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo);
+ SDValue PromoteIntRes_SADDSUBO_CARRY(SDNode *N, unsigned ResNo);
SDValue PromoteIntRes_UNDEF(SDNode *N);
SDValue PromoteIntRes_VAARG(SDNode *N);
SDValue PromoteIntRes_VSCALE(SDNode *N);
SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo);
- SDValue PromoteIntRes_ADDSUBSAT(SDNode *N);
+ SDValue PromoteIntRes_ADDSUBSHLSAT(SDNode *N);
SDValue PromoteIntRes_MULFIX(SDNode *N);
SDValue PromoteIntRes_DIVFIX(SDNode *N);
SDValue PromoteIntRes_FLT_ROUNDS(SDNode *N);
SDValue PromoteIntRes_VECREDUCE(SDNode *N);
SDValue PromoteIntRes_ABS(SDNode *N);
+ SDValue PromoteIntRes_Rotate(SDNode *N);
+ SDValue PromoteIntRes_FunnelShift(SDNode *N);
// Integer Operand Promotion.
bool PromoteIntegerOperand(SDNode *N, unsigned OpNo);
@@ -420,6 +425,7 @@ private:
void ExpandIntRes_FLT_ROUNDS (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_FP_TO_SINT (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_FP_TO_UINT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_FP_TO_XINT_SAT (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_LLROUND_LLRINT (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_Logical (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -427,8 +433,10 @@ private:
void ExpandIntRes_ADDSUBC (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_ADDSUBE (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_ADDSUBCARRY (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_SADDSUBO_CARRY (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_BITREVERSE (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_BSWAP (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_PARITY (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_MUL (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_SDIV (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_SREM (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -442,12 +450,16 @@ private:
void ExpandIntRes_UADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_XMULO (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_ADDSUBSAT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_SHLSAT (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_MULFIX (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_DIVFIX (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_ATOMIC_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_VECREDUCE (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_Rotate (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_FunnelShift (SDNode *N, SDValue &Lo, SDValue &Hi);
+
void ExpandShiftByConstant(SDNode *N, const APInt &Amt,
SDValue &Lo, SDValue &Hi);
bool ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -541,6 +553,8 @@ private:
SDValue SoftenFloatRes_UNDEF(SDNode *N);
SDValue SoftenFloatRes_VAARG(SDNode *N);
SDValue SoftenFloatRes_XINT_TO_FP(SDNode *N);
+ SDValue SoftenFloatRes_VECREDUCE(SDNode *N);
+ SDValue SoftenFloatRes_VECREDUCE_SEQ(SDNode *N);
// Convert Float Operand to Integer.
bool SoftenFloatOperand(SDNode *N, unsigned OpNo);
@@ -549,6 +563,7 @@ private:
SDValue SoftenFloatOp_BR_CC(SDNode *N);
SDValue SoftenFloatOp_FP_ROUND(SDNode *N);
SDValue SoftenFloatOp_FP_TO_XINT(SDNode *N);
+ SDValue SoftenFloatOp_FP_TO_XINT_SAT(SDNode *N);
SDValue SoftenFloatOp_LROUND(SDNode *N);
SDValue SoftenFloatOp_LLROUND(SDNode *N);
SDValue SoftenFloatOp_LRINT(SDNode *N);
@@ -617,8 +632,7 @@ private:
SDValue ExpandFloatOp_BR_CC(SDNode *N);
SDValue ExpandFloatOp_FCOPYSIGN(SDNode *N);
SDValue ExpandFloatOp_FP_ROUND(SDNode *N);
- SDValue ExpandFloatOp_FP_TO_SINT(SDNode *N);
- SDValue ExpandFloatOp_FP_TO_UINT(SDNode *N);
+ SDValue ExpandFloatOp_FP_TO_XINT(SDNode *N);
SDValue ExpandFloatOp_LROUND(SDNode *N);
SDValue ExpandFloatOp_LLROUND(SDNode *N);
SDValue ExpandFloatOp_LRINT(SDNode *N);
@@ -628,7 +642,8 @@ private:
SDValue ExpandFloatOp_STORE(SDNode *N, unsigned OpNo);
void FloatExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
- ISD::CondCode &CCCode, const SDLoc &dl);
+ ISD::CondCode &CCCode, const SDLoc &dl,
+ SDValue &Chain, bool IsSignaling = false);
//===--------------------------------------------------------------------===//
// Float promotion support: LegalizeFloatTypes.cpp
@@ -658,12 +673,15 @@ private:
SDValue PromoteFloatRes_UNDEF(SDNode *N);
SDValue BitcastToInt_ATOMIC_SWAP(SDNode *N);
SDValue PromoteFloatRes_XINT_TO_FP(SDNode *N);
+ SDValue PromoteFloatRes_VECREDUCE(SDNode *N);
+ SDValue PromoteFloatRes_VECREDUCE_SEQ(SDNode *N);
bool PromoteFloatOperand(SDNode *N, unsigned OpNo);
SDValue PromoteFloatOp_BITCAST(SDNode *N, unsigned OpNo);
SDValue PromoteFloatOp_FCOPYSIGN(SDNode *N, unsigned OpNo);
SDValue PromoteFloatOp_FP_EXTEND(SDNode *N, unsigned OpNo);
SDValue PromoteFloatOp_FP_TO_XINT(SDNode *N, unsigned OpNo);
+ SDValue PromoteFloatOp_FP_TO_XINT_SAT(SDNode *N, unsigned OpNo);
SDValue PromoteFloatOp_STORE(SDNode *N, unsigned OpNo);
SDValue PromoteFloatOp_SELECT_CC(SDNode *N, unsigned OpNo);
SDValue PromoteFloatOp_SETCC(SDNode *N, unsigned OpNo);
@@ -695,12 +713,15 @@ private:
SDValue SoftPromoteHalfRes_UnaryOp(SDNode *N);
SDValue SoftPromoteHalfRes_XINT_TO_FP(SDNode *N);
SDValue SoftPromoteHalfRes_UNDEF(SDNode *N);
+ SDValue SoftPromoteHalfRes_VECREDUCE(SDNode *N);
+ SDValue SoftPromoteHalfRes_VECREDUCE_SEQ(SDNode *N);
bool SoftPromoteHalfOperand(SDNode *N, unsigned OpNo);
SDValue SoftPromoteHalfOp_BITCAST(SDNode *N);
SDValue SoftPromoteHalfOp_FCOPYSIGN(SDNode *N, unsigned OpNo);
SDValue SoftPromoteHalfOp_FP_EXTEND(SDNode *N);
SDValue SoftPromoteHalfOp_FP_TO_XINT(SDNode *N);
+ SDValue SoftPromoteHalfOp_FP_TO_XINT_SAT(SDNode *N);
SDValue SoftPromoteHalfOp_SETCC(SDNode *N);
SDValue SoftPromoteHalfOp_SELECT_CC(SDNode *N, unsigned OpNo);
SDValue SoftPromoteHalfOp_STORE(SDNode *N, unsigned OpNo);
@@ -745,6 +766,7 @@ private:
SDValue ScalarizeVecRes_SETCC(SDNode *N);
SDValue ScalarizeVecRes_UNDEF(SDNode *N);
SDValue ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N);
+ SDValue ScalarizeVecRes_FP_TO_XINT_SAT(SDNode *N);
SDValue ScalarizeVecRes_FIX(SDNode *N);
@@ -760,7 +782,10 @@ private:
SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo);
SDValue ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo);
SDValue ScalarizeVecOp_STRICT_FP_ROUND(SDNode *N, unsigned OpNo);
+ SDValue ScalarizeVecOp_FP_EXTEND(SDNode *N);
+ SDValue ScalarizeVecOp_STRICT_FP_EXTEND(SDNode *N);
SDValue ScalarizeVecOp_VECREDUCE(SDNode *N);
+ SDValue ScalarizeVecOp_VECREDUCE_SEQ(SDNode *N);
//===--------------------------------------------------------------------===//
// Vector Splitting Support: LegalizeVectorTypes.cpp
@@ -778,8 +803,8 @@ private:
// Helper function for incrementing the pointer when splitting
// memory operations
- void IncrementPointer(MemSDNode *N, EVT MemVT,
- MachinePointerInfo &MPI, SDValue &Ptr);
+ void IncrementPointer(MemSDNode *N, EVT MemVT, MachinePointerInfo &MPI,
+ SDValue &Ptr, uint64_t *ScaledOffset = nullptr);
// Vector Result Splitting: <128 x ty> -> 2 x <64 x ty>.
void SplitVectorResult(SDNode *N, unsigned ResNo);
@@ -806,20 +831,23 @@ private:
void SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi);
void SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, SDValue &Lo, SDValue &Hi);
void SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, SDValue &Lo, SDValue &Hi);
- void SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_ScalarOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo,
SDValue &Hi);
void SplitVecRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_FP_TO_XINT_SAT(SDNode *N, SDValue &Lo, SDValue &Hi);
// Vector Operand Splitting: <128 x ty> -> 2 x <64 x ty>.
bool SplitVectorOperand(SDNode *N, unsigned OpNo);
SDValue SplitVecOp_VSELECT(SDNode *N, unsigned OpNo);
SDValue SplitVecOp_VECREDUCE(SDNode *N, unsigned OpNo);
+ SDValue SplitVecOp_VECREDUCE_SEQ(SDNode *N);
SDValue SplitVecOp_UnaryOp(SDNode *N);
SDValue SplitVecOp_TruncateHelper(SDNode *N);
SDValue SplitVecOp_BITCAST(SDNode *N);
+ SDValue SplitVecOp_INSERT_SUBVECTOR(SDNode *N, unsigned OpNo);
SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N);
SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue SplitVecOp_ExtVecInRegOp(SDNode *N);
@@ -831,6 +859,7 @@ private:
SDValue SplitVecOp_VSETCC(SDNode *N);
SDValue SplitVecOp_FP_ROUND(SDNode *N);
SDValue SplitVecOp_FCOPYSIGN(SDNode *N);
+ SDValue SplitVecOp_FP_TO_XINT_SAT(SDNode *N);
//===--------------------------------------------------------------------===//
// Vector Widening Support: LegalizeVectorTypes.cpp
@@ -862,9 +891,9 @@ private:
SDValue WidenVecRes_LOAD(SDNode* N);
SDValue WidenVecRes_MLOAD(MaskedLoadSDNode* N);
SDValue WidenVecRes_MGATHER(MaskedGatherSDNode* N);
- SDValue WidenVecRes_SCALAR_TO_VECTOR(SDNode* N);
+ SDValue WidenVecRes_ScalarOp(SDNode* N);
SDValue WidenVecRes_SELECT(SDNode* N);
- SDValue WidenVSELECTAndMask(SDNode *N);
+ SDValue WidenVSELECTMask(SDNode *N);
SDValue WidenVecRes_SELECT_CC(SDNode* N);
SDValue WidenVecRes_SETCC(SDNode* N);
SDValue WidenVecRes_STRICT_FSETCC(SDNode* N);
@@ -879,9 +908,9 @@ private:
SDValue WidenVecRes_OverflowOp(SDNode *N, unsigned ResNo);
SDValue WidenVecRes_Convert(SDNode *N);
SDValue WidenVecRes_Convert_StrictFP(SDNode *N);
+ SDValue WidenVecRes_FP_TO_XINT_SAT(SDNode *N);
SDValue WidenVecRes_FCOPYSIGN(SDNode *N);
SDValue WidenVecRes_POWI(SDNode *N);
- SDValue WidenVecRes_Shift(SDNode *N);
SDValue WidenVecRes_Unary(SDNode *N);
SDValue WidenVecRes_InregOp(SDNode *N);
@@ -901,8 +930,10 @@ private:
SDValue WidenVecOp_VSELECT(SDNode *N);
SDValue WidenVecOp_Convert(SDNode *N);
+ SDValue WidenVecOp_FP_TO_XINT_SAT(SDNode *N);
SDValue WidenVecOp_FCOPYSIGN(SDNode *N);
SDValue WidenVecOp_VECREDUCE(SDNode *N);
+ SDValue WidenVecOp_VECREDUCE_SEQ(SDNode *N);
/// Helper function to generate a set of operations to perform
/// a vector operation for a wider type.
@@ -934,13 +965,6 @@ private:
/// ST: store of a widen value
void GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, StoreSDNode *ST);
- /// Helper function to generate a set of stores to store a truncate widen
- /// vector into non-widen memory.
- /// StChain: list of chains for the stores we have generated
- /// ST: store of a widen value
- void GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain,
- StoreSDNode *ST);
-
/// Modifies a vector input (widen or narrows) to a vector of NVT. The
/// input vector must have the same element type as NVT.
/// When FillWithZeroes is "on" the vector will be widened with zeroes.
@@ -980,8 +1004,6 @@ private:
void SplitRes_UNDEF (SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitRes_FREEZE (SDNode *N, SDValue &Lo, SDValue &Hi);
- void SplitVSETCC(const SDNode *N);
-
//===--------------------------------------------------------------------===//
// Generic Expansion: LegalizeTypesGeneric.cpp
//===--------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 9cd3b8f76d6c..81cc2bf10d25 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -175,7 +175,8 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
// Increment the pointer to the other half.
unsigned IncrementSize = NOutVT.getSizeInBits() / 8;
- StackPtr = DAG.getMemBasePlusOffset(StackPtr, IncrementSize, dl);
+ StackPtr =
+ DAG.getMemBasePlusOffset(StackPtr, TypeSize::Fixed(IncrementSize), dl);
// Load the second half from the stack slot.
Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr,
@@ -266,7 +267,7 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
// Increment the pointer to the other half.
unsigned IncrementSize = NVT.getSizeInBits() / 8;
- Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl);
+ Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(IncrementSize), dl);
Hi = DAG.getLoad(
NVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize),
LD->getOriginalAlign(), LD->getMemOperand()->getFlags(), AAInfo);
@@ -481,7 +482,7 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
St->getOriginalAlign(), St->getMemOperand()->getFlags(),
AAInfo);
- Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
+ Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize));
Hi = DAG.getStore(
Chain, dl, Hi, Ptr, St->getPointerInfo().getWithOffset(IncrementSize),
St->getOriginalAlign(), St->getMemOperand()->getFlags(), AAInfo);
@@ -514,8 +515,8 @@ void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo, SDValue &Hi) {
SDValue Cond = N->getOperand(0);
CL = CH = Cond;
if (Cond.getValueType().isVector()) {
- if (SDValue Res = WidenVSELECTAndMask(N))
- std::tie(CL, CH) = DAG.SplitVector(Res->getOperand(0), dl);
+ if (SDValue Res = WidenVSELECTMask(N))
+ std::tie(CL, CH) = DAG.SplitVector(Res, dl);
// Check if there are already splitted versions of the vector available and
// use those instead of splitting the mask operand again.
else if (getTypeAction(Cond.getValueType()) ==
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 6409f924920d..4015a5a0ce70 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -143,7 +143,6 @@ class VectorLegalizer {
void ExpandSADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
void ExpandMULO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
void ExpandFixedPointDiv(SDNode *Node, SmallVectorImpl<SDValue> &Results);
- SDValue ExpandStrictFPOp(SDNode *Node);
void ExpandStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results);
void ExpandREM(SDNode *Node, SmallVectorImpl<SDValue> &Results);
@@ -454,6 +453,10 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::UADDSAT:
case ISD::SSUBSAT:
case ISD::USUBSAT:
+ case ISD::SSHLSAT:
+ case ISD::USHLSAT:
+ case ISD::FP_TO_SINT_SAT:
+ case ISD::FP_TO_UINT_SAT:
Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
break;
case ISD::SMULFIX:
@@ -487,6 +490,11 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
Action = TLI.getOperationAction(Node->getOpcode(),
Node->getOperand(0).getValueType());
break;
+ case ISD::VECREDUCE_SEQ_FADD:
+ case ISD::VECREDUCE_SEQ_FMUL:
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ Node->getOperand(1).getValueType());
+ break;
}
LLVM_DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG));
@@ -794,7 +802,7 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
break;
case ISD::ROTL:
case ISD::ROTR:
- if (TLI.expandROT(Node, Tmp, DAG)) {
+ if (TLI.expandROT(Node, false /*AllowVectorOps*/, Tmp, DAG)) {
Results.push_back(Tmp);
return;
}
@@ -806,6 +814,15 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
return;
}
break;
+ case ISD::SMIN:
+ case ISD::SMAX:
+ case ISD::UMIN:
+ case ISD::UMAX:
+ if (SDValue Expanded = TLI.expandIntMINMAX(Node, DAG)) {
+ Results.push_back(Expanded);
+ return;
+ }
+ break;
case ISD::UADDO:
case ISD::USUBO:
ExpandUADDSUBO(Node, Results);
@@ -868,6 +885,10 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
case ISD::VECREDUCE_FMIN:
Results.push_back(TLI.expandVecReduce(Node, DAG));
return;
+ case ISD::VECREDUCE_SEQ_FADD:
+ case ISD::VECREDUCE_SEQ_FMUL:
+ Results.push_back(TLI.expandVecReduceSeq(Node, DAG));
+ return;
case ISD::SREM:
case ISD::UREM:
ExpandREM(Node, Results);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index c81d03cac81b..57cb364f1939 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -129,6 +129,8 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::UADDSAT:
case ISD::SSUBSAT:
case ISD::USUBSAT:
+ case ISD::SSHLSAT:
+ case ISD::USHLSAT:
case ISD::FPOW:
case ISD::FREM:
@@ -144,9 +146,13 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::SHL:
case ISD::SRA:
case ISD::SRL:
+ case ISD::ROTL:
+ case ISD::ROTR:
R = ScalarizeVecRes_BinOp(N);
break;
case ISD::FMA:
+ case ISD::FSHL:
+ case ISD::FSHR:
R = ScalarizeVecRes_TernaryOp(N);
break;
@@ -156,6 +162,11 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
R = ScalarizeVecRes_StrictFPOp(N);
break;
+ case ISD::FP_TO_UINT_SAT:
+ case ISD::FP_TO_SINT_SAT:
+ R = ScalarizeVecRes_FP_TO_XINT_SAT(N);
+ break;
+
case ISD::UADDO:
case ISD::SADDO:
case ISD::USUBO:
@@ -510,6 +521,23 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N) {
return GetScalarizedVector(N->getOperand(Op));
}
+SDValue DAGTypeLegalizer::ScalarizeVecRes_FP_TO_XINT_SAT(SDNode *N) {
+ SDValue Src = N->getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ SDLoc dl(N);
+
+ // Handle case where result is scalarized but operand is not
+ if (getTypeAction(SrcVT) == TargetLowering::TypeScalarizeVector)
+ Src = GetScalarizedVector(Src);
+ else
+ Src = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, SrcVT.getVectorElementType(), Src,
+ DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+
+ EVT DstVT = N->getValueType(0).getVectorElementType();
+ return DAG.getNode(N->getOpcode(), dl, DstVT, Src, N->getOperand(1));
+}
+
SDValue DAGTypeLegalizer::ScalarizeVecRes_SETCC(SDNode *N) {
assert(N->getValueType(0).isVector() &&
N->getOperand(0).getValueType().isVector() &&
@@ -552,72 +580,80 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
dbgs() << "\n");
SDValue Res = SDValue();
- if (!Res.getNode()) {
- switch (N->getOpcode()) {
- default:
+ switch (N->getOpcode()) {
+ default:
#ifndef NDEBUG
- dbgs() << "ScalarizeVectorOperand Op #" << OpNo << ": ";
- N->dump(&DAG);
- dbgs() << "\n";
+ dbgs() << "ScalarizeVectorOperand Op #" << OpNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n";
#endif
- report_fatal_error("Do not know how to scalarize this operator's "
- "operand!\n");
- case ISD::BITCAST:
- Res = ScalarizeVecOp_BITCAST(N);
- break;
- case ISD::ANY_EXTEND:
- case ISD::ZERO_EXTEND:
- case ISD::SIGN_EXTEND:
- case ISD::TRUNCATE:
- case ISD::FP_TO_SINT:
- case ISD::FP_TO_UINT:
- case ISD::SINT_TO_FP:
- case ISD::UINT_TO_FP:
- Res = ScalarizeVecOp_UnaryOp(N);
- break;
- case ISD::STRICT_SINT_TO_FP:
- case ISD::STRICT_UINT_TO_FP:
- case ISD::STRICT_FP_TO_SINT:
- case ISD::STRICT_FP_TO_UINT:
- Res = ScalarizeVecOp_UnaryOp_StrictFP(N);
- break;
- case ISD::CONCAT_VECTORS:
- Res = ScalarizeVecOp_CONCAT_VECTORS(N);
- break;
- case ISD::EXTRACT_VECTOR_ELT:
- Res = ScalarizeVecOp_EXTRACT_VECTOR_ELT(N);
- break;
- case ISD::VSELECT:
- Res = ScalarizeVecOp_VSELECT(N);
- break;
- case ISD::SETCC:
- Res = ScalarizeVecOp_VSETCC(N);
- break;
- case ISD::STORE:
- Res = ScalarizeVecOp_STORE(cast<StoreSDNode>(N), OpNo);
- break;
- case ISD::STRICT_FP_ROUND:
- Res = ScalarizeVecOp_STRICT_FP_ROUND(N, OpNo);
- break;
- case ISD::FP_ROUND:
- Res = ScalarizeVecOp_FP_ROUND(N, OpNo);
- break;
- case ISD::VECREDUCE_FADD:
- case ISD::VECREDUCE_FMUL:
- case ISD::VECREDUCE_ADD:
- case ISD::VECREDUCE_MUL:
- case ISD::VECREDUCE_AND:
- case ISD::VECREDUCE_OR:
- case ISD::VECREDUCE_XOR:
- case ISD::VECREDUCE_SMAX:
- case ISD::VECREDUCE_SMIN:
- case ISD::VECREDUCE_UMAX:
- case ISD::VECREDUCE_UMIN:
- case ISD::VECREDUCE_FMAX:
- case ISD::VECREDUCE_FMIN:
- Res = ScalarizeVecOp_VECREDUCE(N);
- break;
- }
+ report_fatal_error("Do not know how to scalarize this operator's "
+ "operand!\n");
+ case ISD::BITCAST:
+ Res = ScalarizeVecOp_BITCAST(N);
+ break;
+ case ISD::ANY_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::SIGN_EXTEND:
+ case ISD::TRUNCATE:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ Res = ScalarizeVecOp_UnaryOp(N);
+ break;
+ case ISD::STRICT_SINT_TO_FP:
+ case ISD::STRICT_UINT_TO_FP:
+ case ISD::STRICT_FP_TO_SINT:
+ case ISD::STRICT_FP_TO_UINT:
+ Res = ScalarizeVecOp_UnaryOp_StrictFP(N);
+ break;
+ case ISD::CONCAT_VECTORS:
+ Res = ScalarizeVecOp_CONCAT_VECTORS(N);
+ break;
+ case ISD::EXTRACT_VECTOR_ELT:
+ Res = ScalarizeVecOp_EXTRACT_VECTOR_ELT(N);
+ break;
+ case ISD::VSELECT:
+ Res = ScalarizeVecOp_VSELECT(N);
+ break;
+ case ISD::SETCC:
+ Res = ScalarizeVecOp_VSETCC(N);
+ break;
+ case ISD::STORE:
+ Res = ScalarizeVecOp_STORE(cast<StoreSDNode>(N), OpNo);
+ break;
+ case ISD::STRICT_FP_ROUND:
+ Res = ScalarizeVecOp_STRICT_FP_ROUND(N, OpNo);
+ break;
+ case ISD::FP_ROUND:
+ Res = ScalarizeVecOp_FP_ROUND(N, OpNo);
+ break;
+ case ISD::STRICT_FP_EXTEND:
+ Res = ScalarizeVecOp_STRICT_FP_EXTEND(N);
+ break;
+ case ISD::FP_EXTEND:
+ Res = ScalarizeVecOp_FP_EXTEND(N);
+ break;
+ case ISD::VECREDUCE_FADD:
+ case ISD::VECREDUCE_FMUL:
+ case ISD::VECREDUCE_ADD:
+ case ISD::VECREDUCE_MUL:
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
+ case ISD::VECREDUCE_SMAX:
+ case ISD::VECREDUCE_SMIN:
+ case ISD::VECREDUCE_UMAX:
+ case ISD::VECREDUCE_UMIN:
+ case ISD::VECREDUCE_FMAX:
+ case ISD::VECREDUCE_FMIN:
+ Res = ScalarizeVecOp_VECREDUCE(N);
+ break;
+ case ISD::VECREDUCE_SEQ_FADD:
+ case ISD::VECREDUCE_SEQ_FMUL:
+ Res = ScalarizeVecOp_VECREDUCE_SEQ(N);
+ break;
}
// If the result is null, the sub-method took care of registering results etc.
@@ -762,6 +798,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){
/// If the value to round is a vector that needs to be scalarized, it must be
/// <1 x ty>. Convert the element instead.
SDValue DAGTypeLegalizer::ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 0 && "Wrong operand for scalarization!");
SDValue Elt = GetScalarizedVector(N->getOperand(0));
SDValue Res = DAG.getNode(ISD::FP_ROUND, SDLoc(N),
N->getValueType(0).getVectorElementType(), Elt,
@@ -787,7 +824,36 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_STRICT_FP_ROUND(SDNode *N,
// handled all replacements since caller can only handle a single result.
ReplaceValueWith(SDValue(N, 0), Res);
return SDValue();
-}
+}
+
+/// If the value to extend is a vector that needs to be scalarized, it must be
+/// <1 x ty>. Convert the element instead.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_FP_EXTEND(SDNode *N) {
+ SDValue Elt = GetScalarizedVector(N->getOperand(0));
+ SDValue Res = DAG.getNode(ISD::FP_EXTEND, SDLoc(N),
+ N->getValueType(0).getVectorElementType(), Elt);
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res);
+}
+
+/// If the value to extend is a vector that needs to be scalarized, it must be
+/// <1 x ty>. Convert the element instead.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_STRICT_FP_EXTEND(SDNode *N) {
+ SDValue Elt = GetScalarizedVector(N->getOperand(1));
+ SDValue Res =
+ DAG.getNode(ISD::STRICT_FP_EXTEND, SDLoc(N),
+ {N->getValueType(0).getVectorElementType(), MVT::Other},
+ {N->getOperand(0), Elt});
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+
+ Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res);
+
+ // Do our own replacement and return SDValue() to tell the caller that we
+ // handled all replacements since caller can only handle a single result.
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return SDValue();
+}
SDValue DAGTypeLegalizer::ScalarizeVecOp_VECREDUCE(SDNode *N) {
SDValue Res = GetScalarizedVector(N->getOperand(0));
@@ -797,6 +863,17 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_VECREDUCE(SDNode *N) {
return Res;
}
+SDValue DAGTypeLegalizer::ScalarizeVecOp_VECREDUCE_SEQ(SDNode *N) {
+ SDValue AccOp = N->getOperand(0);
+ SDValue VecOp = N->getOperand(1);
+
+ unsigned BaseOpc = ISD::getVecReduceBaseOpcode(N->getOpcode());
+
+ SDValue Op = GetScalarizedVector(VecOp);
+ return DAG.getNode(BaseOpc, SDLoc(N), N->getValueType(0),
+ AccOp, Op, N->getFlags());
+}
+
//===----------------------------------------------------------------------===//
// Result Vector Splitting
//===----------------------------------------------------------------------===//
@@ -836,7 +913,10 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break;
case ISD::FCOPYSIGN: SplitVecRes_FCOPYSIGN(N, Lo, Hi); break;
case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break;
- case ISD::SCALAR_TO_VECTOR: SplitVecRes_SCALAR_TO_VECTOR(N, Lo, Hi); break;
+ case ISD::SPLAT_VECTOR:
+ case ISD::SCALAR_TO_VECTOR:
+ SplitVecRes_ScalarOp(N, Lo, Hi);
+ break;
case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
case ISD::LOAD:
SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi);
@@ -939,9 +1019,15 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::UADDSAT:
case ISD::SSUBSAT:
case ISD::USUBSAT:
+ case ISD::SSHLSAT:
+ case ISD::USHLSAT:
+ case ISD::ROTL:
+ case ISD::ROTR:
SplitVecRes_BinOp(N, Lo, Hi);
break;
case ISD::FMA:
+ case ISD::FSHL:
+ case ISD::FSHR:
SplitVecRes_TernaryOp(N, Lo, Hi);
break;
@@ -951,6 +1037,11 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
SplitVecRes_StrictFPOp(N, Lo, Hi);
break;
+ case ISD::FP_TO_UINT_SAT:
+ case ISD::FP_TO_SINT_SAT:
+ SplitVecRes_FP_TO_XINT_SAT(N, Lo, Hi);
+ break;
+
case ISD::UADDO:
case ISD::SADDO:
case ISD::USUBO:
@@ -977,21 +1068,26 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
}
void DAGTypeLegalizer::IncrementPointer(MemSDNode *N, EVT MemVT,
- MachinePointerInfo &MPI,
- SDValue &Ptr) {
+ MachinePointerInfo &MPI, SDValue &Ptr,
+ uint64_t *ScaledOffset) {
SDLoc DL(N);
unsigned IncrementSize = MemVT.getSizeInBits().getKnownMinSize() / 8;
if (MemVT.isScalableVector()) {
+ SDNodeFlags Flags;
SDValue BytesIncrement = DAG.getVScale(
DL, Ptr.getValueType(),
APInt(Ptr.getValueSizeInBits().getFixedSize(), IncrementSize));
MPI = MachinePointerInfo(N->getPointerInfo().getAddrSpace());
- Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, BytesIncrement);
+ Flags.setNoUnsignedWrap(true);
+ if (ScaledOffset)
+ *ScaledOffset += IncrementSize;
+ Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, BytesIncrement,
+ Flags);
} else {
MPI = N->getPointerInfo().getWithOffset(IncrementSize);
// Increment the pointer to the other half.
- Ptr = DAG.getObjectPtrOffset(DL, Ptr, IncrementSize);
+ Ptr = DAG.getObjectPtrOffset(DL, Ptr, TypeSize::Fixed(IncrementSize));
}
}
@@ -1200,7 +1296,8 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo,
// Increment the pointer to the other part.
unsigned IncrementSize = Lo.getValueSizeInBits() / 8;
- StackPtr = DAG.getMemBasePlusOffset(StackPtr, IncrementSize, dl);
+ StackPtr =
+ DAG.getMemBasePlusOffset(StackPtr, TypeSize::Fixed(IncrementSize), dl);
// Load the Hi part from the stack slot.
Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr,
@@ -1448,14 +1545,16 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
if (ConstantSDNode *CIdx = dyn_cast<ConstantSDNode>(Idx)) {
unsigned IdxVal = CIdx->getZExtValue();
- unsigned LoNumElts = Lo.getValueType().getVectorNumElements();
- if (IdxVal < LoNumElts)
+ unsigned LoNumElts = Lo.getValueType().getVectorMinNumElements();
+ if (IdxVal < LoNumElts) {
Lo = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
Lo.getValueType(), Lo, Elt, Idx);
- else
+ return;
+ } else if (!Vec.getValueType().isScalableVector()) {
Hi = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Hi.getValueType(), Hi, Elt,
DAG.getVectorIdxConstant(IdxVal - LoNumElts, dl));
- return;
+ return;
+ }
}
// See if the target wants to custom expand this node.
@@ -1468,7 +1567,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
if (VecVT.getScalarSizeInBits() < 8) {
EltVT = MVT::i8;
VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
- VecVT.getVectorNumElements());
+ VecVT.getVectorElementCount());
Vec = DAG.getNode(ISD::ANY_EXTEND, dl, VecVT, Vec);
// Extend the element type to match if needed.
if (EltVT.bitsGT(Elt.getValueType()))
@@ -1493,7 +1592,8 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
SDValue EltPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
Store = DAG.getTruncStore(
Store, dl, Elt, EltPtr, MachinePointerInfo::getUnknownStack(MF), EltVT,
- commonAlignment(SmallestAlign, EltVT.getSizeInBits() / 8));
+ commonAlignment(SmallestAlign,
+ EltVT.getFixedSizeInBits() / 8));
EVT LoVT, HiVT;
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VecVT);
@@ -1502,12 +1602,11 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
Lo = DAG.getLoad(LoVT, dl, Store, StackPtr, PtrInfo, SmallestAlign);
// Increment the pointer to the other part.
- unsigned IncrementSize = LoVT.getSizeInBits() / 8;
- StackPtr = DAG.getMemBasePlusOffset(StackPtr, IncrementSize, dl);
+ auto Load = cast<LoadSDNode>(Lo);
+ MachinePointerInfo MPI = Load->getPointerInfo();
+ IncrementPointer(Load, LoVT, MPI, StackPtr);
- // Load the Hi part from the stack slot.
- Hi = DAG.getLoad(HiVT, dl, Store, StackPtr,
- PtrInfo.getWithOffset(IncrementSize), SmallestAlign);
+ Hi = DAG.getLoad(HiVT, dl, Store, StackPtr, MPI, SmallestAlign);
// If we adjusted the original type, we need to truncate the results.
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
@@ -1517,13 +1616,18 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
Hi = DAG.getNode(ISD::TRUNCATE, dl, HiVT, Hi);
}
-void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo,
- SDValue &Hi) {
+void DAGTypeLegalizer::SplitVecRes_ScalarOp(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
EVT LoVT, HiVT;
SDLoc dl(N);
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
- Lo = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoVT, N->getOperand(0));
- Hi = DAG.getUNDEF(HiVT);
+ Lo = DAG.getNode(N->getOpcode(), dl, LoVT, N->getOperand(0));
+ if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ Hi = DAG.getUNDEF(HiVT);
+ } else {
+ assert(N->getOpcode() == ISD::SPLAT_VECTOR && "Unexpected opcode");
+ Hi = Lo;
+ }
}
void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
@@ -1611,9 +1715,10 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
else
std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, dl);
+ unsigned LoSize = MemoryLocation::getSizeOrUnknown(LoMemVT.getStoreSize());
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
- MLD->getPointerInfo(), MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
- Alignment, MLD->getAAInfo(), MLD->getRanges());
+ MLD->getPointerInfo(), MachineMemOperand::MOLoad, LoSize, Alignment,
+ MLD->getAAInfo(), MLD->getRanges());
Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, Offset, MaskLo, PassThruLo, LoMemVT,
MMO, MLD->getAddressingMode(), ExtType,
@@ -1627,12 +1732,18 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
// Generate hi masked load.
Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, dl, LoMemVT, DAG,
MLD->isExpandingLoad());
- unsigned HiOffset = LoMemVT.getStoreSize();
+ unsigned HiSize = MemoryLocation::getSizeOrUnknown(HiMemVT.getStoreSize());
+
+ MachinePointerInfo MPI;
+ if (LoMemVT.isScalableVector())
+ MPI = MachinePointerInfo(MLD->getPointerInfo().getAddrSpace());
+ else
+ MPI = MLD->getPointerInfo().getWithOffset(
+ LoMemVT.getStoreSize().getFixedSize());
MMO = DAG.getMachineFunction().getMachineMemOperand(
- MLD->getPointerInfo().getWithOffset(HiOffset),
- MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), Alignment,
- MLD->getAAInfo(), MLD->getRanges());
+ MPI, MachineMemOperand::MOLoad, HiSize, Alignment, MLD->getAAInfo(),
+ MLD->getRanges());
Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, Offset, MaskHi, PassThruHi,
HiMemVT, MMO, MLD->getAddressingMode(), ExtType,
@@ -1662,7 +1773,9 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT,
SDValue PassThru = MGT->getPassThru();
SDValue Index = MGT->getIndex();
SDValue Scale = MGT->getScale();
+ EVT MemoryVT = MGT->getMemoryVT();
Align Alignment = MGT->getOriginalAlign();
+ ISD::LoadExtType ExtType = MGT->getExtensionType();
// Split Mask operand
SDValue MaskLo, MaskHi;
@@ -1675,6 +1788,10 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT,
std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
}
+ EVT LoMemVT, HiMemVT;
+ // Split MemoryVT
+ std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+
SDValue PassThruLo, PassThruHi;
if (getTypeAction(PassThru.getValueType()) == TargetLowering::TypeSplitVector)
GetSplitVector(PassThru, PassThruLo, PassThruHi);
@@ -1693,12 +1810,12 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT,
MGT->getRanges());
SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Scale};
- Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl, OpsLo,
- MMO, MGT->getIndexType());
+ Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoMemVT, dl, OpsLo,
+ MMO, MGT->getIndexType(), ExtType);
SDValue OpsHi[] = {Ch, PassThruHi, MaskHi, Ptr, IndexHi, Scale};
- Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl, OpsHi,
- MMO, MGT->getIndexType());
+ Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiMemVT, dl, OpsHi,
+ MMO, MGT->getIndexType(), ExtType);
// Build a factor node to remember that this load is independent of the
// other one.
@@ -1786,8 +1903,8 @@ void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo,
// more effectively move in the right direction and prevent falling down
// to scalarization in many cases due to the input vector being split too
// far.
- if ((SrcVT.getVectorMinNumElements() & 1) == 0 &&
- SrcVT.getSizeInBits() * 2 < DestVT.getSizeInBits()) {
+ if (SrcVT.getVectorElementCount().isKnownEven() &&
+ SrcVT.getScalarSizeInBits() * 2 < DestVT.getScalarSizeInBits()) {
LLVMContext &Ctx = *DAG.getContext();
EVT NewSrcVT = SrcVT.widenIntegerVectorElementType(Ctx);
EVT SplitSrcVT = SrcVT.getHalfNumVectorElementsVT(Ctx);
@@ -1942,6 +2059,22 @@ void DAGTypeLegalizer::SplitVecRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) {
ReplaceValueWith(SDValue(N, 1), Chain);
}
+void DAGTypeLegalizer::SplitVecRes_FP_TO_XINT_SAT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ EVT DstVTLo, DstVTHi;
+ std::tie(DstVTLo, DstVTHi) = DAG.GetSplitDestVTs(N->getValueType(0));
+ SDLoc dl(N);
+
+ SDValue SrcLo, SrcHi;
+ EVT SrcVT = N->getOperand(0).getValueType();
+ if (getTypeAction(SrcVT) == TargetLowering::TypeSplitVector)
+ GetSplitVector(N->getOperand(0), SrcLo, SrcHi);
+ else
+ std::tie(SrcLo, SrcHi) = DAG.SplitVectorOperand(N, 0);
+
+ Lo = DAG.getNode(N->getOpcode(), dl, DstVTLo, SrcLo, N->getOperand(1));
+ Hi = DAG.getNode(N->getOpcode(), dl, DstVTHi, SrcHi, N->getOperand(1));
+}
//===----------------------------------------------------------------------===//
// Operand Vector Splitting
@@ -1959,92 +2092,95 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
return false;
- if (!Res.getNode()) {
- switch (N->getOpcode()) {
- default:
+ switch (N->getOpcode()) {
+ default:
#ifndef NDEBUG
- dbgs() << "SplitVectorOperand Op #" << OpNo << ": ";
- N->dump(&DAG);
- dbgs() << "\n";
+ dbgs() << "SplitVectorOperand Op #" << OpNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n";
#endif
- report_fatal_error("Do not know how to split this operator's "
- "operand!\n");
-
- case ISD::SETCC: Res = SplitVecOp_VSETCC(N); break;
- case ISD::BITCAST: Res = SplitVecOp_BITCAST(N); break;
- case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break;
- case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break;
- case ISD::CONCAT_VECTORS: Res = SplitVecOp_CONCAT_VECTORS(N); break;
- case ISD::TRUNCATE:
+ report_fatal_error("Do not know how to split this operator's "
+ "operand!\n");
+
+ case ISD::SETCC: Res = SplitVecOp_VSETCC(N); break;
+ case ISD::BITCAST: Res = SplitVecOp_BITCAST(N); break;
+ case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break;
+ case ISD::INSERT_SUBVECTOR: Res = SplitVecOp_INSERT_SUBVECTOR(N, OpNo); break;
+ case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break;
+ case ISD::CONCAT_VECTORS: Res = SplitVecOp_CONCAT_VECTORS(N); break;
+ case ISD::TRUNCATE:
+ Res = SplitVecOp_TruncateHelper(N);
+ break;
+ case ISD::STRICT_FP_ROUND:
+ case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break;
+ case ISD::FCOPYSIGN: Res = SplitVecOp_FCOPYSIGN(N); break;
+ case ISD::STORE:
+ Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
+ break;
+ case ISD::MSTORE:
+ Res = SplitVecOp_MSTORE(cast<MaskedStoreSDNode>(N), OpNo);
+ break;
+ case ISD::MSCATTER:
+ Res = SplitVecOp_MSCATTER(cast<MaskedScatterSDNode>(N), OpNo);
+ break;
+ case ISD::MGATHER:
+ Res = SplitVecOp_MGATHER(cast<MaskedGatherSDNode>(N), OpNo);
+ break;
+ case ISD::VSELECT:
+ Res = SplitVecOp_VSELECT(N, OpNo);
+ break;
+ case ISD::STRICT_SINT_TO_FP:
+ case ISD::STRICT_UINT_TO_FP:
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ if (N->getValueType(0).bitsLT(
+ N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType()))
Res = SplitVecOp_TruncateHelper(N);
- break;
- case ISD::STRICT_FP_ROUND:
- case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break;
- case ISD::FCOPYSIGN: Res = SplitVecOp_FCOPYSIGN(N); break;
- case ISD::STORE:
- Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
- break;
- case ISD::MSTORE:
- Res = SplitVecOp_MSTORE(cast<MaskedStoreSDNode>(N), OpNo);
- break;
- case ISD::MSCATTER:
- Res = SplitVecOp_MSCATTER(cast<MaskedScatterSDNode>(N), OpNo);
- break;
- case ISD::MGATHER:
- Res = SplitVecOp_MGATHER(cast<MaskedGatherSDNode>(N), OpNo);
- break;
- case ISD::VSELECT:
- Res = SplitVecOp_VSELECT(N, OpNo);
- break;
- case ISD::STRICT_SINT_TO_FP:
- case ISD::STRICT_UINT_TO_FP:
- case ISD::SINT_TO_FP:
- case ISD::UINT_TO_FP:
- if (N->getValueType(0).bitsLT(
- N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType()))
- Res = SplitVecOp_TruncateHelper(N);
- else
- Res = SplitVecOp_UnaryOp(N);
- break;
- case ISD::FP_TO_SINT:
- case ISD::FP_TO_UINT:
- case ISD::STRICT_FP_TO_SINT:
- case ISD::STRICT_FP_TO_UINT:
- case ISD::CTTZ:
- case ISD::CTLZ:
- case ISD::CTPOP:
- case ISD::STRICT_FP_EXTEND:
- case ISD::FP_EXTEND:
- case ISD::SIGN_EXTEND:
- case ISD::ZERO_EXTEND:
- case ISD::ANY_EXTEND:
- case ISD::FTRUNC:
- case ISD::FCANONICALIZE:
+ else
Res = SplitVecOp_UnaryOp(N);
- break;
+ break;
+ case ISD::FP_TO_SINT_SAT:
+ case ISD::FP_TO_UINT_SAT:
+ Res = SplitVecOp_FP_TO_XINT_SAT(N);
+ break;
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::STRICT_FP_TO_SINT:
+ case ISD::STRICT_FP_TO_UINT:
+ case ISD::STRICT_FP_EXTEND:
+ case ISD::FP_EXTEND:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND:
+ case ISD::FTRUNC:
+ Res = SplitVecOp_UnaryOp(N);
+ break;
- case ISD::ANY_EXTEND_VECTOR_INREG:
- case ISD::SIGN_EXTEND_VECTOR_INREG:
- case ISD::ZERO_EXTEND_VECTOR_INREG:
- Res = SplitVecOp_ExtVecInRegOp(N);
- break;
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ Res = SplitVecOp_ExtVecInRegOp(N);
+ break;
- case ISD::VECREDUCE_FADD:
- case ISD::VECREDUCE_FMUL:
- case ISD::VECREDUCE_ADD:
- case ISD::VECREDUCE_MUL:
- case ISD::VECREDUCE_AND:
- case ISD::VECREDUCE_OR:
- case ISD::VECREDUCE_XOR:
- case ISD::VECREDUCE_SMAX:
- case ISD::VECREDUCE_SMIN:
- case ISD::VECREDUCE_UMAX:
- case ISD::VECREDUCE_UMIN:
- case ISD::VECREDUCE_FMAX:
- case ISD::VECREDUCE_FMIN:
- Res = SplitVecOp_VECREDUCE(N, OpNo);
- break;
- }
+ case ISD::VECREDUCE_FADD:
+ case ISD::VECREDUCE_FMUL:
+ case ISD::VECREDUCE_ADD:
+ case ISD::VECREDUCE_MUL:
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
+ case ISD::VECREDUCE_SMAX:
+ case ISD::VECREDUCE_SMIN:
+ case ISD::VECREDUCE_UMAX:
+ case ISD::VECREDUCE_UMIN:
+ case ISD::VECREDUCE_FMAX:
+ case ISD::VECREDUCE_FMIN:
+ Res = SplitVecOp_VECREDUCE(N, OpNo);
+ break;
+ case ISD::VECREDUCE_SEQ_FADD:
+ case ISD::VECREDUCE_SEQ_FMUL:
+ Res = SplitVecOp_VECREDUCE_SEQ(N);
+ break;
}
// If the result is null, the sub-method took care of registering results etc.
@@ -2112,36 +2248,35 @@ SDValue DAGTypeLegalizer::SplitVecOp_VECREDUCE(SDNode *N, unsigned OpNo) {
EVT LoOpVT, HiOpVT;
std::tie(LoOpVT, HiOpVT) = DAG.GetSplitDestVTs(VecVT);
- bool NoNaN = N->getFlags().hasNoNaNs();
- unsigned CombineOpc = 0;
- switch (N->getOpcode()) {
- case ISD::VECREDUCE_FADD: CombineOpc = ISD::FADD; break;
- case ISD::VECREDUCE_FMUL: CombineOpc = ISD::FMUL; break;
- case ISD::VECREDUCE_ADD: CombineOpc = ISD::ADD; break;
- case ISD::VECREDUCE_MUL: CombineOpc = ISD::MUL; break;
- case ISD::VECREDUCE_AND: CombineOpc = ISD::AND; break;
- case ISD::VECREDUCE_OR: CombineOpc = ISD::OR; break;
- case ISD::VECREDUCE_XOR: CombineOpc = ISD::XOR; break;
- case ISD::VECREDUCE_SMAX: CombineOpc = ISD::SMAX; break;
- case ISD::VECREDUCE_SMIN: CombineOpc = ISD::SMIN; break;
- case ISD::VECREDUCE_UMAX: CombineOpc = ISD::UMAX; break;
- case ISD::VECREDUCE_UMIN: CombineOpc = ISD::UMIN; break;
- case ISD::VECREDUCE_FMAX:
- CombineOpc = NoNaN ? ISD::FMAXNUM : ISD::FMAXIMUM;
- break;
- case ISD::VECREDUCE_FMIN:
- CombineOpc = NoNaN ? ISD::FMINNUM : ISD::FMINIMUM;
- break;
- default:
- llvm_unreachable("Unexpected reduce ISD node");
- }
-
// Use the appropriate scalar instruction on the split subvectors before
// reducing the now partially reduced smaller vector.
+ unsigned CombineOpc = ISD::getVecReduceBaseOpcode(N->getOpcode());
SDValue Partial = DAG.getNode(CombineOpc, dl, LoOpVT, Lo, Hi, N->getFlags());
return DAG.getNode(N->getOpcode(), dl, ResVT, Partial, N->getFlags());
}
+SDValue DAGTypeLegalizer::SplitVecOp_VECREDUCE_SEQ(SDNode *N) {
+ EVT ResVT = N->getValueType(0);
+ SDValue Lo, Hi;
+ SDLoc dl(N);
+
+ SDValue AccOp = N->getOperand(0);
+ SDValue VecOp = N->getOperand(1);
+ SDNodeFlags Flags = N->getFlags();
+
+ EVT VecVT = VecOp.getValueType();
+ assert(VecVT.isVector() && "Can only split reduce vector operand");
+ GetSplitVector(VecOp, Lo, Hi);
+ EVT LoOpVT, HiOpVT;
+ std::tie(LoOpVT, HiOpVT) = DAG.GetSplitDestVTs(VecVT);
+
+ // Reduce low half.
+ SDValue Partial = DAG.getNode(N->getOpcode(), dl, ResVT, AccOp, Lo, Flags);
+
+ // Reduce high half, using low half result as initial value.
+ return DAG.getNode(N->getOpcode(), dl, ResVT, Partial, Hi, Flags);
+}
+
SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) {
// The result has a legal vector type, but the input needs splitting.
EVT ResVT = N->getValueType(0);
@@ -2191,9 +2326,36 @@ SDValue DAGTypeLegalizer::SplitVecOp_BITCAST(SDNode *N) {
JoinIntegers(Lo, Hi));
}
+SDValue DAGTypeLegalizer::SplitVecOp_INSERT_SUBVECTOR(SDNode *N,
+ unsigned OpNo) {
+ assert(OpNo == 1 && "Invalid OpNo; can only split SubVec.");
+ // We know that the result type is legal.
+ EVT ResVT = N->getValueType(0);
+
+ SDValue Vec = N->getOperand(0);
+ SDValue SubVec = N->getOperand(1);
+ SDValue Idx = N->getOperand(2);
+ SDLoc dl(N);
+
+ SDValue Lo, Hi;
+ GetSplitVector(SubVec, Lo, Hi);
+
+ uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ uint64_t LoElts = Lo.getValueType().getVectorMinNumElements();
+
+ SDValue FirstInsertion =
+ DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Vec, Lo, Idx);
+ SDValue SecondInsertion =
+ DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, FirstInsertion, Hi,
+ DAG.getVectorIdxConstant(IdxVal + LoElts, dl));
+
+ return SecondInsertion;
+}
+
SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
// We know that the extracted result type is legal.
EVT SubVT = N->getValueType(0);
+
SDValue Idx = N->getOperand(1);
SDLoc dl(N);
SDValue Lo, Hi;
@@ -2229,13 +2391,14 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
SDValue Lo, Hi;
GetSplitVector(Vec, Lo, Hi);
- uint64_t LoElts = Lo.getValueType().getVectorNumElements();
+ uint64_t LoElts = Lo.getValueType().getVectorMinNumElements();
if (IdxVal < LoElts)
return SDValue(DAG.UpdateNodeOperands(N, Lo, Idx), 0);
- return SDValue(DAG.UpdateNodeOperands(N, Hi,
- DAG.getConstant(IdxVal - LoElts, SDLoc(N),
- Idx.getValueType())), 0);
+ else if (!Vec.getValueType().isScalableVector())
+ return SDValue(DAG.UpdateNodeOperands(N, Hi,
+ DAG.getConstant(IdxVal - LoElts, SDLoc(N),
+ Idx.getValueType())), 0);
}
// See if the target wants to custom expand this node.
@@ -2248,7 +2411,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
if (VecVT.getScalarSizeInBits() < 8) {
EltVT = MVT::i8;
VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
- VecVT.getVectorNumElements());
+ VecVT.getVectorElementCount());
Vec = DAG.getNode(ISD::ANY_EXTEND, dl, VecVT, Vec);
}
@@ -2278,7 +2441,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
return DAG.getExtLoad(
ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr,
MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()), EltVT,
- commonAlignment(SmallestAlign, EltVT.getSizeInBits() / 8));
+ commonAlignment(SmallestAlign, EltVT.getFixedSizeInBits() / 8));
}
SDValue DAGTypeLegalizer::SplitVecOp_ExtVecInRegOp(SDNode *N) {
@@ -2304,6 +2467,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
SDValue Mask = MGT->getMask();
SDValue PassThru = MGT->getPassThru();
Align Alignment = MGT->getOriginalAlign();
+ ISD::LoadExtType ExtType = MGT->getExtensionType();
SDValue MaskLo, MaskHi;
if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
@@ -2334,12 +2498,12 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
MGT->getRanges());
SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Scale};
- SDValue Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl,
- OpsLo, MMO, MGT->getIndexType());
+ SDValue Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoMemVT, dl,
+ OpsLo, MMO, MGT->getIndexType(), ExtType);
SDValue OpsHi[] = {Ch, PassThruHi, MaskHi, Ptr, IndexHi, Scale};
- SDValue Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl,
- OpsHi, MMO, MGT->getIndexType());
+ SDValue Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiMemVT, dl,
+ OpsHi, MMO, MGT->getIndexType(), ExtType);
// Build a factor node to remember that this load is independent of the
// other one.
@@ -2393,9 +2557,10 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
DAG.GetDependentSplitDestVTs(MemoryVT, DataLo.getValueType(), &HiIsEmpty);
SDValue Lo, Hi, Res;
+ unsigned LoSize = MemoryLocation::getSizeOrUnknown(LoMemVT.getStoreSize());
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
- N->getPointerInfo(), MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
- Alignment, N->getAAInfo(), N->getRanges());
+ N->getPointerInfo(), MachineMemOperand::MOStore, LoSize, Alignment,
+ N->getAAInfo(), N->getRanges());
Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, Offset, MaskLo, LoMemVT, MMO,
N->getAddressingMode(), N->isTruncatingStore(),
@@ -2409,11 +2574,20 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
N->isCompressingStore());
- unsigned HiOffset = LoMemVT.getStoreSize();
+ MachinePointerInfo MPI;
+ if (LoMemVT.isScalableVector()) {
+ Alignment = commonAlignment(
+ Alignment, LoMemVT.getSizeInBits().getKnownMinSize() / 8);
+ MPI = MachinePointerInfo(N->getPointerInfo().getAddrSpace());
+ } else
+ MPI = N->getPointerInfo().getWithOffset(
+ LoMemVT.getStoreSize().getFixedSize());
+
+ unsigned HiSize = MemoryLocation::getSizeOrUnknown(HiMemVT.getStoreSize());
MMO = DAG.getMachineFunction().getMachineMemOperand(
- N->getPointerInfo().getWithOffset(HiOffset), MachineMemOperand::MOStore,
- HiMemVT.getStoreSize(), Alignment, N->getAAInfo(), N->getRanges());
+ MPI, MachineMemOperand::MOStore, HiSize, Alignment, N->getAAInfo(),
+ N->getRanges());
Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, Offset, MaskHi, HiMemVT, MMO,
N->getAddressingMode(), N->isTruncatingStore(),
@@ -2435,11 +2609,15 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,
SDValue Index = N->getIndex();
SDValue Scale = N->getScale();
SDValue Data = N->getValue();
+ EVT MemoryVT = N->getMemoryVT();
Align Alignment = N->getOriginalAlign();
SDLoc DL(N);
// Split all operands
+ EVT LoMemVT, HiMemVT;
+ std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+
SDValue DataLo, DataHi;
if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector)
// Split Data operand
@@ -2470,15 +2648,17 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,
MemoryLocation::UnknownSize, Alignment, N->getAAInfo(), N->getRanges());
SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo, Scale};
- Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(),
- DL, OpsLo, MMO, N->getIndexType());
+ Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), LoMemVT,
+ DL, OpsLo, MMO, N->getIndexType(),
+ N->isTruncatingStore());
// The order of the Scatter operation after split is well defined. The "Hi"
// part comes after the "Lo". So these two operations should be chained one
// after another.
SDValue OpsHi[] = {Lo, DataHi, MaskHi, Ptr, IndexHi, Scale};
- return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
- DL, OpsHi, MMO, N->getIndexType());
+ return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), HiMemVT,
+ DL, OpsHi, MMO, N->getIndexType(),
+ N->isTruncatingStore());
}
SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
@@ -2604,7 +2784,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) {
EVT::getFloatingPointVT(InElementSize/2) :
EVT::getIntegerVT(*DAG.getContext(), InElementSize/2);
EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT,
- NumElements/2);
+ NumElements.divideCoefficientBy(2));
SDValue HalfLo;
SDValue HalfHi;
@@ -2683,7 +2863,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) {
EVT InVT = Lo.getValueType();
EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
- InVT.getVectorNumElements());
+ InVT.getVectorElementCount());
if (N->isStrictFPOpcode()) {
Lo = DAG.getNode(N->getOpcode(), DL, { OutVT, MVT::Other },
@@ -2709,6 +2889,22 @@ SDValue DAGTypeLegalizer::SplitVecOp_FCOPYSIGN(SDNode *N) {
return DAG.UnrollVectorOp(N, N->getValueType(0).getVectorNumElements());
}
+SDValue DAGTypeLegalizer::SplitVecOp_FP_TO_XINT_SAT(SDNode *N) {
+ EVT ResVT = N->getValueType(0);
+ SDValue Lo, Hi;
+ SDLoc dl(N);
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ EVT InVT = Lo.getValueType();
+
+ EVT NewResVT =
+ EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
+ InVT.getVectorElementCount());
+
+ Lo = DAG.getNode(N->getOpcode(), dl, NewResVT, Lo, N->getOperand(1));
+ Hi = DAG.getNode(N->getOpcode(), dl, NewResVT, Hi, N->getOperand(1));
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
+}
//===----------------------------------------------------------------------===//
// Result Vector Widening
@@ -2739,7 +2935,10 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break;
case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;
case ISD::LOAD: Res = WidenVecRes_LOAD(N); break;
- case ISD::SCALAR_TO_VECTOR: Res = WidenVecRes_SCALAR_TO_VECTOR(N); break;
+ case ISD::SPLAT_VECTOR:
+ case ISD::SCALAR_TO_VECTOR:
+ Res = WidenVecRes_ScalarOp(N);
+ break;
case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_InregOp(N); break;
case ISD::VSELECT:
case ISD::SELECT: Res = WidenVecRes_SELECT(N); break;
@@ -2764,6 +2963,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::OR:
case ISD::SUB:
case ISD::XOR:
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
case ISD::FMINNUM:
case ISD::FMAXNUM:
case ISD::FMINIMUM:
@@ -2776,6 +2978,10 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::SADDSAT:
case ISD::USUBSAT:
case ISD::SSUBSAT:
+ case ISD::SSHLSAT:
+ case ISD::USHLSAT:
+ case ISD::ROTL:
+ case ISD::ROTR:
Res = WidenVecRes_Binary(N);
break;
@@ -2824,12 +3030,6 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
Res = WidenVecRes_POWI(N);
break;
- case ISD::SHL:
- case ISD::SRA:
- case ISD::SRL:
- Res = WidenVecRes_Shift(N);
- break;
-
case ISD::ANY_EXTEND_VECTOR_INREG:
case ISD::SIGN_EXTEND_VECTOR_INREG:
case ISD::ZERO_EXTEND_VECTOR_INREG:
@@ -2849,6 +3049,11 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
Res = WidenVecRes_Convert(N);
break;
+ case ISD::FP_TO_SINT_SAT:
+ case ISD::FP_TO_UINT_SAT:
+ Res = WidenVecRes_FP_TO_XINT_SAT(N);
+ break;
+
case ISD::FABS:
case ISD::FCEIL:
case ISD::FCOS:
@@ -2896,6 +3101,8 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
Res = WidenVecRes_Unary(N);
break;
case ISD::FMA:
+ case ISD::FSHL:
+ case ISD::FSHR:
Res = WidenVecRes_Ternary(N);
break;
}
@@ -3261,19 +3468,34 @@ SDValue DAGTypeLegalizer::WidenVecRes_OverflowOp(SDNode *N, unsigned ResNo) {
}
SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
+ LLVMContext &Ctx = *DAG.getContext();
SDValue InOp = N->getOperand(0);
SDLoc DL(N);
- EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ EVT WidenVT = TLI.getTypeToTransformTo(Ctx, N->getValueType(0));
unsigned WidenNumElts = WidenVT.getVectorNumElements();
EVT InVT = InOp.getValueType();
- EVT InEltVT = InVT.getVectorElementType();
- EVT InWidenVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenNumElts);
unsigned Opcode = N->getOpcode();
- unsigned InVTNumElts = InVT.getVectorNumElements();
const SDNodeFlags Flags = N->getFlags();
+
+ // Handle the case of ZERO_EXTEND where the promoted InVT element size does
+ // not equal that of WidenVT.
+ if (N->getOpcode() == ISD::ZERO_EXTEND &&
+ getTypeAction(InVT) == TargetLowering::TypePromoteInteger &&
+ TLI.getTypeToTransformTo(Ctx, InVT).getScalarSizeInBits() !=
+ WidenVT.getScalarSizeInBits()) {
+ InOp = ZExtPromotedInteger(InOp);
+ InVT = InOp.getValueType();
+ if (WidenVT.getScalarSizeInBits() < InVT.getScalarSizeInBits())
+ Opcode = ISD::TRUNCATE;
+ }
+
+ EVT InEltVT = InVT.getVectorElementType();
+ EVT InWidenVT = EVT::getVectorVT(Ctx, InEltVT, WidenNumElts);
+ unsigned InVTNumElts = InVT.getVectorNumElements();
+
if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
InOp = GetWidenedVector(N->getOperand(0));
InVT = InOp.getValueType();
@@ -3341,6 +3563,27 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
return DAG.getBuildVector(WidenVT, DL, Ops);
}
+SDValue DAGTypeLegalizer::WidenVecRes_FP_TO_XINT_SAT(SDNode *N) {
+ SDLoc dl(N);
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ ElementCount WidenNumElts = WidenVT.getVectorElementCount();
+
+ SDValue Src = N->getOperand(0);
+ EVT SrcVT = Src.getValueType();
+
+ // Also widen the input.
+ if (getTypeAction(SrcVT) == TargetLowering::TypeWidenVector) {
+ Src = GetWidenedVector(Src);
+ SrcVT = Src.getValueType();
+ }
+
+ // Input and output not widened to the same size, give up.
+ if (WidenNumElts != SrcVT.getVectorElementCount())
+ return DAG.UnrollVectorOp(N, WidenNumElts.getKnownMinValue());
+
+ return DAG.getNode(N->getOpcode(), dl, WidenVT, Src, N->getOperand(1));
+}
+
SDValue DAGTypeLegalizer::WidenVecRes_Convert_StrictFP(SDNode *N) {
SDValue InOp = N->getOperand(1);
SDLoc DL(N);
@@ -3447,25 +3690,6 @@ SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) {
return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp, ShOp);
}
-SDValue DAGTypeLegalizer::WidenVecRes_Shift(SDNode *N) {
- EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue InOp = GetWidenedVector(N->getOperand(0));
- SDValue ShOp = N->getOperand(1);
-
- EVT ShVT = ShOp.getValueType();
- if (getTypeAction(ShVT) == TargetLowering::TypeWidenVector) {
- ShOp = GetWidenedVector(ShOp);
- ShVT = ShOp.getValueType();
- }
- EVT ShWidenVT = EVT::getVectorVT(*DAG.getContext(),
- ShVT.getVectorElementType(),
- WidenVT.getVectorNumElements());
- if (ShVT != ShWidenVT)
- ShOp = ModifyToType(ShOp, ShWidenVT);
-
- return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp, ShOp);
-}
-
SDValue DAGTypeLegalizer::WidenVecRes_Unary(SDNode *N) {
// Unary op widening.
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
@@ -3820,9 +4044,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) {
Index = ModifyToType(Index, WideIndexVT);
SDValue Ops[] = { N->getChain(), PassThru, Mask, N->getBasePtr(), Index,
Scale };
+
+ // Widen the MemoryType
+ EVT WideMemVT = EVT::getVectorVT(*DAG.getContext(),
+ N->getMemoryVT().getScalarType(), NumElts);
SDValue Res = DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other),
- N->getMemoryVT(), dl, Ops,
- N->getMemOperand(), N->getIndexType());
+ WideMemVT, dl, Ops, N->getMemOperand(),
+ N->getIndexType(), N->getExtensionType());
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
@@ -3830,10 +4058,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) {
return Res;
}
-SDValue DAGTypeLegalizer::WidenVecRes_SCALAR_TO_VECTOR(SDNode *N) {
+SDValue DAGTypeLegalizer::WidenVecRes_ScalarOp(SDNode *N) {
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N),
- WidenVT, N->getOperand(0));
+ return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, N->getOperand(0));
}
// Return true is this is a SETCC node or a strict version of it.
@@ -3953,11 +4180,11 @@ SDValue DAGTypeLegalizer::convertMask(SDValue InMask, EVT MaskVT,
return Mask;
}
-// This method tries to handle VSELECT and its mask by legalizing operands
-// (which may require widening) and if needed adjusting the mask vector type
-// to match that of the VSELECT. Without it, many cases end up with
-// scalarization of the SETCC, with many unnecessary instructions.
-SDValue DAGTypeLegalizer::WidenVSELECTAndMask(SDNode *N) {
+// This method tries to handle some special cases for the vselect mask
+// and if needed adjusting the mask vector type to match that of the VSELECT.
+// Without it, many cases end up with scalarization of the SETCC, with many
+// unnecessary instructions.
+SDValue DAGTypeLegalizer::WidenVSELECTMask(SDNode *N) {
LLVMContext &Ctx = *DAG.getContext();
SDValue Cond = N->getOperand(0);
@@ -4004,14 +4231,9 @@ SDValue DAGTypeLegalizer::WidenVSELECTAndMask(SDNode *N) {
return SDValue();
}
- // Get the VT and operands for VSELECT, and widen if needed.
- SDValue VSelOp1 = N->getOperand(1);
- SDValue VSelOp2 = N->getOperand(2);
- if (getTypeAction(VSelVT) == TargetLowering::TypeWidenVector) {
+ // Widen the vselect result type if needed.
+ if (getTypeAction(VSelVT) == TargetLowering::TypeWidenVector)
VSelVT = TLI.getTypeToTransformTo(Ctx, VSelVT);
- VSelOp1 = GetWidenedVector(VSelOp1);
- VSelOp2 = GetWidenedVector(VSelOp2);
- }
// The mask of the VSELECT should have integer elements.
EVT ToMaskVT = VSelVT;
@@ -4060,7 +4282,7 @@ SDValue DAGTypeLegalizer::WidenVSELECTAndMask(SDNode *N) {
} else
return SDValue();
- return DAG.getNode(ISD::VSELECT, SDLoc(N), VSelVT, Mask, VSelOp1, VSelOp2);
+ return Mask;
}
SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {
@@ -4070,8 +4292,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {
SDValue Cond1 = N->getOperand(0);
EVT CondVT = Cond1.getValueType();
if (CondVT.isVector()) {
- if (SDValue Res = WidenVSELECTAndMask(N))
- return Res;
+ if (SDValue WideCond = WidenVSELECTMask(N)) {
+ SDValue InOp1 = GetWidenedVector(N->getOperand(1));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(2));
+ assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT);
+ return DAG.getNode(N->getOpcode(), SDLoc(N),
+ WidenVT, WideCond, InOp1, InOp2);
+ }
EVT CondEltVT = CondVT.getVectorElementType();
EVT CondWidenVT = EVT::getVectorVT(*DAG.getContext(),
@@ -4278,6 +4505,11 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
Res = WidenVecOp_Convert(N);
break;
+ case ISD::FP_TO_SINT_SAT:
+ case ISD::FP_TO_UINT_SAT:
+ Res = WidenVecOp_FP_TO_XINT_SAT(N);
+ break;
+
case ISD::VECREDUCE_FADD:
case ISD::VECREDUCE_FMUL:
case ISD::VECREDUCE_ADD:
@@ -4293,6 +4525,10 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::VECREDUCE_FMIN:
Res = WidenVecOp_VECREDUCE(N);
break;
+ case ISD::VECREDUCE_SEQ_FADD:
+ case ISD::VECREDUCE_SEQ_FMUL:
+ Res = WidenVecOp_VECREDUCE_SEQ(N);
+ break;
}
// If Res is null, the sub-method took care of registering the result.
@@ -4447,6 +4683,28 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
return DAG.getBuildVector(VT, dl, Ops);
}
+SDValue DAGTypeLegalizer::WidenVecOp_FP_TO_XINT_SAT(SDNode *N) {
+ EVT DstVT = N->getValueType(0);
+ SDValue Src = GetWidenedVector(N->getOperand(0));
+ EVT SrcVT = Src.getValueType();
+ ElementCount WideNumElts = SrcVT.getVectorElementCount();
+ SDLoc dl(N);
+
+ // See if a widened result type would be legal, if so widen the node.
+ EVT WideDstVT = EVT::getVectorVT(*DAG.getContext(),
+ DstVT.getVectorElementType(), WideNumElts);
+ if (TLI.isTypeLegal(WideDstVT)) {
+ SDValue Res =
+ DAG.getNode(N->getOpcode(), dl, WideDstVT, Src, N->getOperand(1));
+ return DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, dl, DstVT, Res,
+ DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ }
+
+ // Give up and unroll.
+ return DAG.UnrollVectorOp(N);
+}
+
SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) {
EVT VT = N->getValueType(0);
SDValue InOp = GetWidenedVector(N->getOperand(0));
@@ -4547,11 +4805,11 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
if (!ST->getMemoryVT().getScalarType().isByteSized())
return TLI.scalarizeVectorStore(ST, DAG);
- SmallVector<SDValue, 16> StChain;
if (ST->isTruncatingStore())
- GenWidenVectorTruncStores(StChain, ST);
- else
- GenWidenVectorStores(StChain, ST);
+ return TLI.scalarizeVectorStore(ST, DAG);
+
+ SmallVector<SDValue, 16> StChain;
+ GenWidenVectorStores(StChain, ST);
if (StChain.size() == 1)
return StChain[0];
@@ -4613,7 +4871,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_MGATHER(SDNode *N, unsigned OpNo) {
SDValue Ops[] = {MG->getChain(), DataOp, Mask, MG->getBasePtr(), Index,
Scale};
SDValue Res = DAG.getMaskedGather(MG->getVTList(), MG->getMemoryVT(), dl, Ops,
- MG->getMemOperand(), MG->getIndexType());
+ MG->getMemOperand(), MG->getIndexType(),
+ MG->getExtensionType());
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
ReplaceValueWith(SDValue(N, 0), Res.getValue(0));
return SDValue();
@@ -4625,6 +4884,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) {
SDValue Mask = MSC->getMask();
SDValue Index = MSC->getIndex();
SDValue Scale = MSC->getScale();
+ EVT WideMemVT = MSC->getMemoryVT();
if (OpNo == 1) {
DataOp = GetWidenedVector(DataOp);
@@ -4641,6 +4901,10 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) {
EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(),
MaskVT.getVectorElementType(), NumElts);
Mask = ModifyToType(Mask, WideMaskVT, true);
+
+ // Widen the MemoryType
+ WideMemVT = EVT::getVectorVT(*DAG.getContext(),
+ MSC->getMemoryVT().getScalarType(), NumElts);
} else if (OpNo == 4) {
// Just widen the index. It's allowed to have extra elements.
Index = GetWidenedVector(Index);
@@ -4649,9 +4913,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) {
SDValue Ops[] = {MSC->getChain(), DataOp, Mask, MSC->getBasePtr(), Index,
Scale};
- return DAG.getMaskedScatter(DAG.getVTList(MVT::Other),
- MSC->getMemoryVT(), SDLoc(N), Ops,
- MSC->getMemOperand(), MSC->getIndexType());
+ return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), WideMemVT, SDLoc(N),
+ Ops, MSC->getMemOperand(), MSC->getIndexType(),
+ MSC->isTruncatingStore());
}
SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
@@ -4730,45 +4994,12 @@ SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) {
EVT OrigVT = N->getOperand(0).getValueType();
EVT WideVT = Op.getValueType();
EVT ElemVT = OrigVT.getVectorElementType();
+ SDNodeFlags Flags = N->getFlags();
- SDValue NeutralElem;
- switch (N->getOpcode()) {
- case ISD::VECREDUCE_ADD:
- case ISD::VECREDUCE_OR:
- case ISD::VECREDUCE_XOR:
- case ISD::VECREDUCE_UMAX:
- NeutralElem = DAG.getConstant(0, dl, ElemVT);
- break;
- case ISD::VECREDUCE_MUL:
- NeutralElem = DAG.getConstant(1, dl, ElemVT);
- break;
- case ISD::VECREDUCE_AND:
- case ISD::VECREDUCE_UMIN:
- NeutralElem = DAG.getAllOnesConstant(dl, ElemVT);
- break;
- case ISD::VECREDUCE_SMAX:
- NeutralElem = DAG.getConstant(
- APInt::getSignedMinValue(ElemVT.getSizeInBits()), dl, ElemVT);
- break;
- case ISD::VECREDUCE_SMIN:
- NeutralElem = DAG.getConstant(
- APInt::getSignedMaxValue(ElemVT.getSizeInBits()), dl, ElemVT);
- break;
- case ISD::VECREDUCE_FADD:
- NeutralElem = DAG.getConstantFP(0.0, dl, ElemVT);
- break;
- case ISD::VECREDUCE_FMUL:
- NeutralElem = DAG.getConstantFP(1.0, dl, ElemVT);
- break;
- case ISD::VECREDUCE_FMAX:
- NeutralElem = DAG.getConstantFP(
- -std::numeric_limits<double>::infinity(), dl, ElemVT);
- break;
- case ISD::VECREDUCE_FMIN:
- NeutralElem = DAG.getConstantFP(
- std::numeric_limits<double>::infinity(), dl, ElemVT);
- break;
- }
+ unsigned Opc = N->getOpcode();
+ unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Opc);
+ SDValue NeutralElem = DAG.getNeutralElement(BaseOpc, dl, ElemVT, Flags);
+ assert(NeutralElem && "Neutral element must exist");
// Pad the vector with the neutral element.
unsigned OrigElts = OrigVT.getVectorNumElements();
@@ -4777,7 +5008,32 @@ SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) {
Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, WideVT, Op, NeutralElem,
DAG.getVectorIdxConstant(Idx, dl));
- return DAG.getNode(N->getOpcode(), dl, N->getValueType(0), Op, N->getFlags());
+ return DAG.getNode(Opc, dl, N->getValueType(0), Op, Flags);
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE_SEQ(SDNode *N) {
+ SDLoc dl(N);
+ SDValue AccOp = N->getOperand(0);
+ SDValue VecOp = N->getOperand(1);
+ SDValue Op = GetWidenedVector(VecOp);
+
+ EVT OrigVT = VecOp.getValueType();
+ EVT WideVT = Op.getValueType();
+ EVT ElemVT = OrigVT.getVectorElementType();
+ SDNodeFlags Flags = N->getFlags();
+
+ unsigned Opc = N->getOpcode();
+ unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Opc);
+ SDValue NeutralElem = DAG.getNeutralElement(BaseOpc, dl, ElemVT, Flags);
+
+ // Pad the vector with the neutral element.
+ unsigned OrigElts = OrigVT.getVectorNumElements();
+ unsigned WideElts = WideVT.getVectorNumElements();
+ for (unsigned Idx = OrigElts; Idx < WideElts; Idx++)
+ Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, WideVT, Op, NeutralElem,
+ DAG.getVectorIdxConstant(Idx, dl));
+
+ return DAG.getNode(Opc, dl, N->getValueType(0), AccOp, Op, Flags);
}
SDValue DAGTypeLegalizer::WidenVecOp_VSELECT(SDNode *N) {
@@ -4820,7 +5076,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
// If we have one element to load/store, return it.
EVT RetVT = WidenEltVT;
- if (Width == WidenEltWidth)
+ if (!Scalable && Width == WidenEltWidth)
return RetVT;
// See if there is larger legal integer than the element type to load/store.
@@ -4866,11 +5122,14 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
isPowerOf2_32(WidenWidth / MemVTWidth) &&
(MemVTWidth <= Width ||
(Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
- if (RetVT.getSizeInBits() < MemVTWidth || MemVT == WidenVT)
+ if (RetVT.getFixedSizeInBits() < MemVTWidth || MemVT == WidenVT)
return MemVT;
}
}
+ if (Scalable)
+ report_fatal_error("Using element-wise loads and stores for widening "
+ "operations is not supported for scalable vectors");
return RetVT;
}
@@ -4913,10 +5172,10 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
// element type or scalar loads and then recombines it to the widen vector
// type.
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0));
- unsigned WidenWidth = WidenVT.getSizeInBits();
EVT LdVT = LD->getMemoryVT();
SDLoc dl(LD);
assert(LdVT.isVector() && WidenVT.isVector());
+ assert(LdVT.isScalableVector() == WidenVT.isScalableVector());
assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType());
// Load information
@@ -4925,23 +5184,25 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
AAMDNodes AAInfo = LD->getAAInfo();
- int LdWidth = LdVT.getSizeInBits();
- int WidthDiff = WidenWidth - LdWidth;
+ TypeSize LdWidth = LdVT.getSizeInBits();
+ TypeSize WidenWidth = WidenVT.getSizeInBits();
+ TypeSize WidthDiff = WidenWidth - LdWidth;
// Allow wider loads if they are sufficiently aligned to avoid memory faults
// and if the original load is simple.
unsigned LdAlign = (!LD->isSimple()) ? 0 : LD->getAlignment();
// Find the vector type that can load from.
- EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
- int NewVTWidth = NewVT.getSizeInBits();
+ EVT NewVT = FindMemType(DAG, TLI, LdWidth.getKnownMinSize(), WidenVT, LdAlign,
+ WidthDiff.getKnownMinSize());
+ TypeSize NewVTWidth = NewVT.getSizeInBits();
SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(),
LD->getOriginalAlign(), MMOFlags, AAInfo);
LdChain.push_back(LdOp.getValue(1));
// Check if we can load the element with one instruction.
- if (LdWidth <= NewVTWidth) {
+ if (TypeSize::isKnownLE(LdWidth, NewVTWidth)) {
if (!NewVT.isVector()) {
- unsigned NumElts = WidenWidth / NewVTWidth;
+ unsigned NumElts = WidenWidth.getFixedSize() / NewVTWidth.getFixedSize();
EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp);
@@ -4949,8 +5210,9 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
if (NewVT == WidenVT)
return LdOp;
- assert(WidenWidth % NewVTWidth == 0);
- unsigned NumConcat = WidenWidth / NewVTWidth;
+ // TODO: We don't currently have any tests that exercise this code path.
+ assert(WidenWidth.getFixedSize() % NewVTWidth.getFixedSize() == 0);
+ unsigned NumConcat = WidenWidth.getFixedSize() / NewVTWidth.getFixedSize();
SmallVector<SDValue, 16> ConcatOps(NumConcat);
SDValue UndefVal = DAG.getUNDEF(NewVT);
ConcatOps[0] = LdOp;
@@ -4963,35 +5225,30 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
SmallVector<SDValue, 16> LdOps;
LdOps.push_back(LdOp);
- LdWidth -= NewVTWidth;
- unsigned Offset = 0;
-
- while (LdWidth > 0) {
- unsigned Increment = NewVTWidth / 8;
- Offset += Increment;
- BasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Increment);
+ uint64_t ScaledOffset = 0;
+ MachinePointerInfo MPI = LD->getPointerInfo();
+ do {
+ LdWidth -= NewVTWidth;
+ IncrementPointer(cast<LoadSDNode>(LdOp), NewVT, MPI, BasePtr,
+ &ScaledOffset);
- SDValue L;
- if (LdWidth < NewVTWidth) {
+ if (TypeSize::isKnownLT(LdWidth, NewVTWidth)) {
// The current type we are using is too large. Find a better size.
- NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
+ NewVT = FindMemType(DAG, TLI, LdWidth.getKnownMinSize(), WidenVT, LdAlign,
+ WidthDiff.getKnownMinSize());
NewVTWidth = NewVT.getSizeInBits();
- L = DAG.getLoad(NewVT, dl, Chain, BasePtr,
- LD->getPointerInfo().getWithOffset(Offset),
- LD->getOriginalAlign(), MMOFlags, AAInfo);
- LdChain.push_back(L.getValue(1));
- } else {
- L = DAG.getLoad(NewVT, dl, Chain, BasePtr,
- LD->getPointerInfo().getWithOffset(Offset),
- LD->getOriginalAlign(), MMOFlags, AAInfo);
- LdChain.push_back(L.getValue(1));
}
+ Align NewAlign = ScaledOffset == 0
+ ? LD->getOriginalAlign()
+ : commonAlignment(LD->getAlign(), ScaledOffset);
+ SDValue L =
+ DAG.getLoad(NewVT, dl, Chain, BasePtr, MPI, NewAlign, MMOFlags, AAInfo);
+ LdChain.push_back(L.getValue(1));
+
LdOps.push_back(L);
LdOp = L;
-
- LdWidth -= NewVTWidth;
- }
+ } while (TypeSize::isKnownGT(LdWidth, NewVTWidth));
// Build the vector from the load operations.
unsigned End = LdOps.size();
@@ -5015,13 +5272,18 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
}
ConcatOps[--Idx] = BuildVectorFromScalar(DAG, LdTy, LdOps, i + 1, End);
}
+
ConcatOps[--Idx] = LdOps[i];
for (--i; i >= 0; --i) {
EVT NewLdTy = LdOps[i].getValueType();
if (NewLdTy != LdTy) {
// Create a larger vector.
- unsigned NumOps = NewLdTy.getSizeInBits() / LdTy.getSizeInBits();
- assert(NewLdTy.getSizeInBits() % LdTy.getSizeInBits() == 0);
+ TypeSize LdTySize = LdTy.getSizeInBits();
+ TypeSize NewLdTySize = NewLdTy.getSizeInBits();
+ assert(NewLdTySize.isScalable() == LdTySize.isScalable() &&
+ NewLdTySize.isKnownMultipleOf(LdTySize.getKnownMinSize()));
+ unsigned NumOps =
+ NewLdTySize.getKnownMinSize() / LdTySize.getKnownMinSize();
SmallVector<SDValue, 16> WidenOps(NumOps);
unsigned j = 0;
for (; j != End-Idx; ++j)
@@ -5042,7 +5304,8 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
makeArrayRef(&ConcatOps[Idx], End - Idx));
// We need to fill the rest with undefs to build the vector.
- unsigned NumOps = WidenWidth / LdTy.getSizeInBits();
+ unsigned NumOps =
+ WidenWidth.getKnownMinSize() / LdTy.getSizeInBits().getKnownMinSize();
SmallVector<SDValue, 16> WidenOps(NumOps);
SDValue UndefVal = DAG.getUNDEF(LdTy);
{
@@ -5065,6 +5328,7 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain,
EVT LdVT = LD->getMemoryVT();
SDLoc dl(LD);
assert(LdVT.isVector() && WidenVT.isVector());
+ assert(LdVT.isScalableVector() == WidenVT.isScalableVector());
// Load information
SDValue Chain = LD->getChain();
@@ -5072,6 +5336,10 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain,
MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
AAMDNodes AAInfo = LD->getAAInfo();
+ if (LdVT.isScalableVector())
+ report_fatal_error("Generating widen scalable extending vector loads is "
+ "not yet supported");
+
EVT EltVT = WidenVT.getVectorElementType();
EVT LdEltVT = LdVT.getVectorElementType();
unsigned NumElts = LdVT.getVectorNumElements();
@@ -5086,7 +5354,8 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain,
LdChain.push_back(Ops[0].getValue(1));
unsigned i = 0, Offset = Increment;
for (i=1; i < NumElts; ++i, Offset += Increment) {
- SDValue NewBasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Offset);
+ SDValue NewBasePtr =
+ DAG.getObjectPtrOffset(dl, BasePtr, TypeSize::Fixed(Offset));
Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr,
LD->getPointerInfo().getWithOffset(Offset), LdEltVT,
LD->getOriginalAlign(), MMOFlags, AAInfo);
@@ -5114,99 +5383,66 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
SDLoc dl(ST);
EVT StVT = ST->getMemoryVT();
- unsigned StWidth = StVT.getSizeInBits();
+ TypeSize StWidth = StVT.getSizeInBits();
EVT ValVT = ValOp.getValueType();
- unsigned ValWidth = ValVT.getSizeInBits();
+ TypeSize ValWidth = ValVT.getSizeInBits();
EVT ValEltVT = ValVT.getVectorElementType();
- unsigned ValEltWidth = ValEltVT.getSizeInBits();
+ unsigned ValEltWidth = ValEltVT.getFixedSizeInBits();
assert(StVT.getVectorElementType() == ValEltVT);
+ assert(StVT.isScalableVector() == ValVT.isScalableVector() &&
+ "Mismatch between store and value types");
int Idx = 0; // current index to store
- unsigned Offset = 0; // offset from base to store
- while (StWidth != 0) {
+
+ MachinePointerInfo MPI = ST->getPointerInfo();
+ uint64_t ScaledOffset = 0;
+ while (StWidth.isNonZero()) {
// Find the largest vector type we can store with.
- EVT NewVT = FindMemType(DAG, TLI, StWidth, ValVT);
- unsigned NewVTWidth = NewVT.getSizeInBits();
- unsigned Increment = NewVTWidth / 8;
+ EVT NewVT = FindMemType(DAG, TLI, StWidth.getKnownMinSize(), ValVT);
+ TypeSize NewVTWidth = NewVT.getSizeInBits();
+
if (NewVT.isVector()) {
- unsigned NumVTElts = NewVT.getVectorNumElements();
+ unsigned NumVTElts = NewVT.getVectorMinNumElements();
do {
+ Align NewAlign = ScaledOffset == 0
+ ? ST->getOriginalAlign()
+ : commonAlignment(ST->getAlign(), ScaledOffset);
SDValue EOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp,
DAG.getVectorIdxConstant(Idx, dl));
- StChain.push_back(DAG.getStore(
- Chain, dl, EOp, BasePtr, ST->getPointerInfo().getWithOffset(Offset),
- ST->getOriginalAlign(), MMOFlags, AAInfo));
+ SDValue PartStore = DAG.getStore(Chain, dl, EOp, BasePtr, MPI, NewAlign,
+ MMOFlags, AAInfo);
+ StChain.push_back(PartStore);
+
StWidth -= NewVTWidth;
- Offset += Increment;
Idx += NumVTElts;
- BasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Increment);
- } while (StWidth != 0 && StWidth >= NewVTWidth);
+ IncrementPointer(cast<StoreSDNode>(PartStore), NewVT, MPI, BasePtr,
+ &ScaledOffset);
+ } while (StWidth.isNonZero() && TypeSize::isKnownGE(StWidth, NewVTWidth));
} else {
// Cast the vector to the scalar type we can store.
- unsigned NumElts = ValWidth / NewVTWidth;
+ unsigned NumElts = ValWidth.getFixedSize() / NewVTWidth.getFixedSize();
EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
SDValue VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, ValOp);
// Readjust index position based on new vector type.
- Idx = Idx * ValEltWidth / NewVTWidth;
+ Idx = Idx * ValEltWidth / NewVTWidth.getFixedSize();
do {
SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp,
DAG.getVectorIdxConstant(Idx++, dl));
- StChain.push_back(DAG.getStore(
- Chain, dl, EOp, BasePtr, ST->getPointerInfo().getWithOffset(Offset),
- ST->getOriginalAlign(), MMOFlags, AAInfo));
+ SDValue PartStore =
+ DAG.getStore(Chain, dl, EOp, BasePtr, MPI, ST->getOriginalAlign(),
+ MMOFlags, AAInfo);
+ StChain.push_back(PartStore);
+
StWidth -= NewVTWidth;
- Offset += Increment;
- BasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Increment);
- } while (StWidth != 0 && StWidth >= NewVTWidth);
+ IncrementPointer(cast<StoreSDNode>(PartStore), NewVT, MPI, BasePtr);
+ } while (StWidth.isNonZero() && TypeSize::isKnownGE(StWidth, NewVTWidth));
// Restore index back to be relative to the original widen element type.
- Idx = Idx * NewVTWidth / ValEltWidth;
+ Idx = Idx * NewVTWidth.getFixedSize() / ValEltWidth;
}
}
}
-void
-DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain,
- StoreSDNode *ST) {
- // For extension loads, it may not be more efficient to truncate the vector
- // and then store it. Instead, we extract each element and then store it.
- SDValue Chain = ST->getChain();
- SDValue BasePtr = ST->getBasePtr();
- MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
- AAMDNodes AAInfo = ST->getAAInfo();
- SDValue ValOp = GetWidenedVector(ST->getValue());
- SDLoc dl(ST);
-
- EVT StVT = ST->getMemoryVT();
- EVT ValVT = ValOp.getValueType();
-
- // It must be true that the wide vector type is bigger than where we need to
- // store.
- assert(StVT.isVector() && ValOp.getValueType().isVector());
- assert(StVT.bitsLT(ValOp.getValueType()));
-
- // For truncating stores, we can not play the tricks of chopping legal vector
- // types and bitcast it to the right type. Instead, we unroll the store.
- EVT StEltVT = StVT.getVectorElementType();
- EVT ValEltVT = ValVT.getVectorElementType();
- unsigned Increment = ValEltVT.getSizeInBits() / 8;
- unsigned NumElts = StVT.getVectorNumElements();
- SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
- DAG.getVectorIdxConstant(0, dl));
- StChain.push_back(
- DAG.getTruncStore(Chain, dl, EOp, BasePtr, ST->getPointerInfo(), StEltVT,
- ST->getOriginalAlign(), MMOFlags, AAInfo));
- unsigned Offset = Increment;
- for (unsigned i=1; i < NumElts; ++i, Offset += Increment) {
- SDValue NewBasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Offset);
- SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
- DAG.getVectorIdxConstant(0, dl));
- StChain.push_back(DAG.getTruncStore(
- Chain, dl, EOp, NewBasePtr, ST->getPointerInfo().getWithOffset(Offset),
- StEltVT, ST->getOriginalAlign(), MMOFlags, AAInfo));
- }
-}
-
/// Modifies a vector input (widen or narrows) to a vector of NVT. The
/// input vector must have the same element type as NVT.
/// FillWithZeroes specifies that the vector should be widened with zeroes.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 2902c96c7658..0022e5ec31f0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -760,7 +760,7 @@ void ScheduleDAGLinearize::Schedule() {
MachineBasicBlock*
ScheduleDAGLinearize::EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
- InstrEmitter Emitter(BB, InsertPos);
+ InstrEmitter Emitter(DAG->getTarget(), BB, InsertPos);
DenseMap<SDValue, Register> VRBaseMap;
LLVM_DEBUG({ dbgs() << "\n*** Final schedule ***\n"; });
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 72e68a5045c6..7a5e8ac6075e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -1838,13 +1838,16 @@ protected:
template<class SF>
static SUnit *popFromQueueImpl(std::vector<SUnit *> &Q, SF &Picker) {
- std::vector<SUnit *>::iterator Best = Q.begin();
- for (auto I = std::next(Q.begin()), E = Q.end(); I != E; ++I)
- if (Picker(*Best, *I))
- Best = I;
- SUnit *V = *Best;
- if (Best != std::prev(Q.end()))
- std::swap(*Best, Q.back());
+ unsigned BestIdx = 0;
+ // Only compute the cost for the first 1000 items in the queue, to avoid
+ // excessive compile-times for very large queues.
+ for (unsigned I = 1, E = std::min(Q.size(), (decltype(Q.size()))1000); I != E;
+ I++)
+ if (Picker(Q[BestIdx], Q[I]))
+ BestIdx = I;
+ SUnit *V = Q[BestIdx];
+ if (BestIdx + 1 != Q.size())
+ std::swap(Q[BestIdx], Q.back());
Q.pop_back();
return V;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index ce20d506586f..debfdda90e1e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -125,8 +125,7 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
PhysReg = Reg;
} else if (Def->isMachineOpcode()) {
const MCInstrDesc &II = TII->get(Def->getMachineOpcode());
- if (ResNo >= II.getNumDefs() &&
- II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg)
+ if (ResNo >= II.getNumDefs() && II.hasImplicitDefOfPhysReg(Reg))
PhysReg = Reg;
}
@@ -173,7 +172,7 @@ static bool AddGlue(SDNode *N, SDValue Glue, bool AddGlue, SelectionDAG *DAG) {
// Don't add glue to something that already has a glue value.
if (N->getValueType(N->getNumValues() - 1) == MVT::Glue) return false;
- SmallVector<EVT, 4> VTs(N->value_begin(), N->value_end());
+ SmallVector<EVT, 4> VTs(N->values());
if (AddGlue)
VTs.push_back(MVT::Glue);
@@ -830,7 +829,7 @@ EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, Register> &VRBaseMap,
/// not necessarily refer to returned BB. The emitter may split blocks.
MachineBasicBlock *ScheduleDAGSDNodes::
EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
- InstrEmitter Emitter(BB, InsertPos);
+ InstrEmitter Emitter(DAG->getTarget(), BB, InsertPos);
DenseMap<SDValue, Register> VRBaseMap;
DenseMap<SUnit*, Register> CopyVRBaseMap;
SmallVector<std::pair<unsigned, MachineInstr*>, 32> Orders;
@@ -1034,7 +1033,29 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
}
InsertPos = Emitter.getInsertPos();
- return Emitter.getBlock();
+ // In some cases, DBG_VALUEs might be inserted after the first terminator,
+ // which results in an invalid MBB. If that happens, move the DBG_VALUEs
+ // before the first terminator.
+ MachineBasicBlock *InsertBB = Emitter.getBlock();
+ auto FirstTerm = InsertBB->getFirstTerminator();
+ if (FirstTerm != InsertBB->end()) {
+ assert(!FirstTerm->isDebugValue() &&
+ "first terminator cannot be a debug value");
+ for (MachineInstr &MI : make_early_inc_range(
+ make_range(std::next(FirstTerm), InsertBB->end()))) {
+ if (!MI.isDebugValue())
+ continue;
+
+ if (&MI == InsertPos)
+ InsertPos = std::prev(InsertPos->getIterator());
+
+ // The DBG_VALUE was referencing a value produced by a terminator. By
+ // moving the DBG_VALUE, the referenced value also needs invalidating.
+ MI.getOperand(0).ChangeToRegister(0, false);
+ MI.moveBefore(&*FirstTerm);
+ }
+ }
+ return InsertBB;
}
/// Return the basic block label.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 592c09c10fb0..2090762e2ff4 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -28,6 +28,7 @@
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
@@ -138,6 +139,15 @@ bool ConstantFPSDNode::isValueValidForType(EVT VT,
//===----------------------------------------------------------------------===//
bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal) {
+ if (N->getOpcode() == ISD::SPLAT_VECTOR) {
+ unsigned EltSize =
+ N->getValueType(0).getVectorElementType().getSizeInBits();
+ if (auto *Op0 = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
+ SplatVal = Op0->getAPIntValue().truncOrSelf(EltSize);
+ return true;
+ }
+ }
+
auto *BV = dyn_cast<BuildVectorSDNode>(N);
if (!BV)
return false;
@@ -154,11 +164,16 @@ bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal) {
// FIXME: AllOnes and AllZeros duplicate a lot of code. Could these be
// specializations of the more general isConstantSplatVector()?
-bool ISD::isBuildVectorAllOnes(const SDNode *N) {
+bool ISD::isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly) {
// Look through a bit convert.
while (N->getOpcode() == ISD::BITCAST)
N = N->getOperand(0).getNode();
+ if (!BuildVectorOnly && N->getOpcode() == ISD::SPLAT_VECTOR) {
+ APInt SplatVal;
+ return isConstantSplatVector(N, SplatVal) && SplatVal.isAllOnesValue();
+ }
+
if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
unsigned i = 0, e = N->getNumOperands();
@@ -198,11 +213,16 @@ bool ISD::isBuildVectorAllOnes(const SDNode *N) {
return true;
}
-bool ISD::isBuildVectorAllZeros(const SDNode *N) {
+bool ISD::isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly) {
// Look through a bit convert.
while (N->getOpcode() == ISD::BITCAST)
N = N->getOperand(0).getNode();
+ if (!BuildVectorOnly && N->getOpcode() == ISD::SPLAT_VECTOR) {
+ APInt SplatVal;
+ return isConstantSplatVector(N, SplatVal) && SplatVal.isNullValue();
+ }
+
if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
bool IsAllUndef = true;
@@ -235,6 +255,14 @@ bool ISD::isBuildVectorAllZeros(const SDNode *N) {
return true;
}
+bool ISD::isBuildVectorAllOnes(const SDNode *N) {
+ return isConstantSplatVectorAllOnes(N, /*BuildVectorOnly*/ true);
+}
+
+bool ISD::isBuildVectorAllZeros(const SDNode *N) {
+ return isConstantSplatVectorAllZeros(N, /*BuildVectorOnly*/ true);
+}
+
bool ISD::isBuildVectorOfConstantSDNodes(const SDNode *N) {
if (N->getOpcode() != ISD::BUILD_VECTOR)
return false;
@@ -278,7 +306,8 @@ bool ISD::matchUnaryPredicate(SDValue Op,
return Match(Cst);
// FIXME: Add support for vector UNDEF cases?
- if (ISD::BUILD_VECTOR != Op.getOpcode())
+ if (ISD::BUILD_VECTOR != Op.getOpcode() &&
+ ISD::SPLAT_VECTOR != Op.getOpcode())
return false;
EVT SVT = Op.getValueType().getScalarType();
@@ -332,6 +361,76 @@ bool ISD::matchBinaryPredicate(
return true;
}
+ISD::NodeType ISD::getVecReduceBaseOpcode(unsigned VecReduceOpcode) {
+ switch (VecReduceOpcode) {
+ default:
+ llvm_unreachable("Expected VECREDUCE opcode");
+ case ISD::VECREDUCE_FADD:
+ case ISD::VECREDUCE_SEQ_FADD:
+ return ISD::FADD;
+ case ISD::VECREDUCE_FMUL:
+ case ISD::VECREDUCE_SEQ_FMUL:
+ return ISD::FMUL;
+ case ISD::VECREDUCE_ADD:
+ return ISD::ADD;
+ case ISD::VECREDUCE_MUL:
+ return ISD::MUL;
+ case ISD::VECREDUCE_AND:
+ return ISD::AND;
+ case ISD::VECREDUCE_OR:
+ return ISD::OR;
+ case ISD::VECREDUCE_XOR:
+ return ISD::XOR;
+ case ISD::VECREDUCE_SMAX:
+ return ISD::SMAX;
+ case ISD::VECREDUCE_SMIN:
+ return ISD::SMIN;
+ case ISD::VECREDUCE_UMAX:
+ return ISD::UMAX;
+ case ISD::VECREDUCE_UMIN:
+ return ISD::UMIN;
+ case ISD::VECREDUCE_FMAX:
+ return ISD::FMAXNUM;
+ case ISD::VECREDUCE_FMIN:
+ return ISD::FMINNUM;
+ }
+}
+
+bool ISD::isVPOpcode(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ return false;
+#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) \
+ case ISD::SDOPC: \
+ return true;
+#include "llvm/IR/VPIntrinsics.def"
+ }
+}
+
+/// The operand position of the vector mask.
+Optional<unsigned> ISD::getVPMaskIdx(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ return None;
+#define BEGIN_REGISTER_VP_SDNODE(SDOPC, LEGALPOS, TDNAME, MASKPOS, ...) \
+ case ISD::SDOPC: \
+ return MASKPOS;
+#include "llvm/IR/VPIntrinsics.def"
+ }
+}
+
+/// The operand position of the explicit vector length parameter.
+Optional<unsigned> ISD::getVPExplicitVectorLengthIdx(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ return None;
+#define BEGIN_REGISTER_VP_SDNODE(SDOPC, LEGALPOS, TDNAME, MASKPOS, EVLPOS) \
+ case ISD::SDOPC: \
+ return EVLPOS;
+#include "llvm/IR/VPIntrinsics.def"
+ }
+}
+
ISD::NodeType ISD::getExtForLoadExtType(bool IsFP, ISD::LoadExtType ExtType) {
switch (ExtType) {
case ISD::EXTLOAD:
@@ -536,6 +635,11 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
ID.AddInteger(cast<LifetimeSDNode>(N)->getOffset());
}
break;
+ case ISD::PSEUDO_PROBE:
+ ID.AddInteger(cast<PseudoProbeSDNode>(N)->getGuid());
+ ID.AddInteger(cast<PseudoProbeSDNode>(N)->getIndex());
+ ID.AddInteger(cast<PseudoProbeSDNode>(N)->getAttributes());
+ break;
case ISD::JumpTable:
case ISD::TargetJumpTable:
ID.AddInteger(cast<JumpTableSDNode>(N)->getIndex());
@@ -1229,7 +1333,7 @@ SDValue SelectionDAG::getConstant(uint64_t Val, const SDLoc &DL, EVT VT,
bool isT, bool isO) {
EVT EltVT = VT.getScalarType();
assert((EltVT.getSizeInBits() >= 64 ||
- (uint64_t)((int64_t)Val >> EltVT.getSizeInBits()) + 1 < 2) &&
+ (uint64_t)((int64_t)Val >> EltVT.getSizeInBits()) + 1 < 2) &&
"getConstant with a uint64_t value that doesn't fit in the type!");
return getConstant(APInt(EltVT.getSizeInBits(), Val), DL, VT, isT, isO);
}
@@ -1251,10 +1355,10 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL,
// inserted value (the type does not need to match the vector element type).
// Any extra bits introduced will be truncated away.
if (VT.isVector() && TLI->getTypeAction(*getContext(), EltVT) ==
- TargetLowering::TypePromoteInteger) {
- EltVT = TLI->getTypeToTransformTo(*getContext(), EltVT);
- APInt NewVal = Elt->getValue().zextOrTrunc(EltVT.getSizeInBits());
- Elt = ConstantInt::get(*getContext(), NewVal);
+ TargetLowering::TypePromoteInteger) {
+ EltVT = TLI->getTypeToTransformTo(*getContext(), EltVT);
+ APInt NewVal = Elt->getValue().zextOrTrunc(EltVT.getSizeInBits());
+ Elt = ConstantInt::get(*getContext(), NewVal);
}
// In other cases the element type is illegal and needs to be expanded, for
// example v2i64 on MIPS32. In this case, find the nearest legal type, split
@@ -1264,7 +1368,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL,
// only legalize if the DAG tells us we must produce legal types.
else if (NewNodesMustHaveLegalTypes && VT.isVector() &&
TLI->getTypeAction(*getContext(), EltVT) ==
- TargetLowering::TypeExpandInteger) {
+ TargetLowering::TypeExpandInteger) {
const APInt &NewVal = Elt->getValue();
EVT ViaEltVT = TLI->getTypeToTransformTo(*getContext(), EltVT);
unsigned ViaEltSizeInBits = ViaEltVT.getSizeInBits();
@@ -1278,9 +1382,9 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL,
SmallVector<SDValue, 2> EltParts;
for (unsigned i = 0; i < ViaVecNumElts / VT.getVectorNumElements(); ++i) {
- EltParts.push_back(getConstant(NewVal.lshr(i * ViaEltSizeInBits)
- .zextOrTrunc(ViaEltSizeInBits), DL,
- ViaEltVT, isT, isO));
+ EltParts.push_back(getConstant(
+ NewVal.lshr(i * ViaEltSizeInBits).zextOrTrunc(ViaEltSizeInBits), DL,
+ ViaEltVT, isT, isO));
}
// EltParts is currently in little endian order. If we actually want
@@ -1297,9 +1401,10 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL,
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
- Ops.insert(Ops.end(), EltParts.begin(), EltParts.end());
+ llvm::append_range(Ops, EltParts);
- SDValue V = getNode(ISD::BITCAST, DL, VT, getBuildVector(ViaVecVT, DL, Ops));
+ SDValue V =
+ getNode(ISD::BITCAST, DL, VT, getBuildVector(ViaVecVT, DL, Ops));
return V;
}
@@ -1380,7 +1485,9 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP &V, const SDLoc &DL,
}
SDValue Result(N, 0);
- if (VT.isVector())
+ if (VT.isScalableVector())
+ Result = getSplatVector(VT, DL, Result);
+ else if (VT.isVector())
Result = getSplatBuildVector(VT, DL, Result);
NewSDValueDbgMsg(Result, "Creating fp constant: ", this);
return Result;
@@ -2023,7 +2130,14 @@ Align SelectionDAG::getReducedAlign(EVT VT, bool UseABI) {
SDValue SelectionDAG::CreateStackTemporary(TypeSize Bytes, Align Alignment) {
MachineFrameInfo &MFI = MF->getFrameInfo();
- int FrameIdx = MFI.CreateStackObject(Bytes, Alignment, false);
+ const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
+ int StackID = 0;
+ if (Bytes.isScalable())
+ StackID = TFI->getStackIDForScalableVectors();
+ // The stack id gives an indication of whether the object is scalable or
+ // not, so it's safe to pass in the minimum size here.
+ int FrameIdx = MFI.CreateStackObject(Bytes.getKnownMinSize(), Alignment,
+ false, nullptr, StackID);
return getFrameIndex(FrameIdx, TLI->getFrameIndexTy(getDataLayout()));
}
@@ -2035,7 +2149,14 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) {
}
SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) {
- TypeSize Bytes = std::max(VT1.getStoreSize(), VT2.getStoreSize());
+ TypeSize VT1Size = VT1.getStoreSize();
+ TypeSize VT2Size = VT2.getStoreSize();
+ assert(VT1Size.isScalable() == VT2Size.isScalable() &&
+ "Don't know how to choose the maximum size when creating a stack "
+ "temporary");
+ TypeSize Bytes =
+ VT1Size.getKnownMinSize() > VT2Size.getKnownMinSize() ? VT1Size : VT2Size;
+
Type *Ty1 = VT1.getTypeForEVT(*getContext());
Type *Ty2 = VT2.getTypeForEVT(*getContext());
const DataLayout &DL = getDataLayout();
@@ -2204,6 +2325,10 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
/// SimplifyMultipleUseDemandedBits and not generate any new nodes.
SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits) {
EVT VT = V.getValueType();
+
+ if (VT.isScalableVector())
+ return SDValue();
+
APInt DemandedElts = VT.isVector()
? APInt::getAllOnesValue(VT.getVectorNumElements())
: APInt(1, 1);
@@ -2221,7 +2346,6 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits,
default:
return TLI->SimplifyMultipleUseDemandedBits(V, DemandedBits, DemandedElts,
*this, 0);
- break;
case ISD::Constant: {
const APInt &CVal = cast<ConstantSDNode>(V)->getAPIntValue();
APInt NewVal = CVal & DemandedBits;
@@ -2247,18 +2371,6 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits,
V.getOperand(1));
}
break;
- case ISD::AND: {
- // X & -1 -> X (ignoring bits which aren't demanded).
- // Also handle the case where masked out bits in X are known to be zero.
- if (ConstantSDNode *RHSC = isConstOrConstSplat(V.getOperand(1))) {
- const APInt &AndVal = RHSC->getAPIntValue();
- if (DemandedBits.isSubsetOf(AndVal) ||
- DemandedBits.isSubsetOf(computeKnownBits(V.getOperand(0)).Zero |
- AndVal))
- return V.getOperand(0);
- }
- break;
- }
}
return SDValue();
}
@@ -2298,17 +2410,23 @@ bool SelectionDAG::MaskedValueIsAllOnes(SDValue V, const APInt &Mask,
/// sense to specify which elements are demanded or undefined, therefore
/// they are simply ignored.
bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
- APInt &UndefElts) {
+ APInt &UndefElts, unsigned Depth) {
EVT VT = V.getValueType();
assert(VT.isVector() && "Vector type expected");
if (!VT.isScalableVector() && !DemandedElts)
return false; // No demanded elts, better to assume we don't know anything.
+ if (Depth >= MaxRecursionDepth)
+ return false; // Limit search depth.
+
// Deal with some common cases here that work for both fixed and scalable
// vector types.
switch (V.getOpcode()) {
case ISD::SPLAT_VECTOR:
+ UndefElts = V.getOperand(0).isUndef()
+ ? APInt::getAllOnesValue(DemandedElts.getBitWidth())
+ : APInt(DemandedElts.getBitWidth(), 0);
return true;
case ISD::ADD:
case ISD::SUB:
@@ -2316,13 +2434,17 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
APInt UndefLHS, UndefRHS;
SDValue LHS = V.getOperand(0);
SDValue RHS = V.getOperand(1);
- if (isSplatValue(LHS, DemandedElts, UndefLHS) &&
- isSplatValue(RHS, DemandedElts, UndefRHS)) {
+ if (isSplatValue(LHS, DemandedElts, UndefLHS, Depth + 1) &&
+ isSplatValue(RHS, DemandedElts, UndefRHS, Depth + 1)) {
UndefElts = UndefLHS | UndefRHS;
return true;
}
break;
}
+ case ISD::TRUNCATE:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ return isSplatValue(V.getOperand(0), DemandedElts, UndefElts, Depth + 1);
}
// We don't support other cases than those above for scalable vectors at
@@ -2377,7 +2499,7 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
APInt UndefSrcElts;
APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
- if (isSplatValue(Src, DemandedSrcElts, UndefSrcElts)) {
+ if (isSplatValue(Src, DemandedSrcElts, UndefSrcElts, Depth + 1)) {
UndefElts = UndefSrcElts.extractBits(NumElts, Idx);
return true;
}
@@ -2574,15 +2696,11 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
// We know all of the bits for a constant!
- Known.One = C->getAPIntValue();
- Known.Zero = ~Known.One;
- return Known;
+ return KnownBits::makeConstant(C->getAPIntValue());
}
if (auto *C = dyn_cast<ConstantFPSDNode>(Op)) {
// We know all of the bits for a constant fp!
- Known.One = C->getValueAPF().bitcastToAPInt();
- Known.Zero = ~Known.One;
- return Known;
+ return KnownBits::makeConstant(C->getValueAPF().bitcastToAPInt());
}
if (Depth >= MaxRecursionDepth)
@@ -2617,8 +2735,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
}
// Known bits are the values that are shared by every demanded element.
- Known.One &= Known2.One;
- Known.Zero &= Known2.Zero;
+ Known = KnownBits::commonBits(Known, Known2);
// If we don't know any bits, early out.
if (Known.isUnknown())
@@ -2655,8 +2772,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
if (!!DemandedLHS) {
SDValue LHS = Op.getOperand(0);
Known2 = computeKnownBits(LHS, DemandedLHS, Depth + 1);
- Known.One &= Known2.One;
- Known.Zero &= Known2.Zero;
+ Known = KnownBits::commonBits(Known, Known2);
}
// If we don't know any bits, early out.
if (Known.isUnknown())
@@ -2664,8 +2780,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
if (!!DemandedRHS) {
SDValue RHS = Op.getOperand(1);
Known2 = computeKnownBits(RHS, DemandedRHS, Depth + 1);
- Known.One &= Known2.One;
- Known.Zero &= Known2.Zero;
+ Known = KnownBits::commonBits(Known, Known2);
}
break;
}
@@ -2681,8 +2796,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
if (!!DemandedSub) {
SDValue Sub = Op.getOperand(i);
Known2 = computeKnownBits(Sub, DemandedSub, Depth + 1);
- Known.One &= Known2.One;
- Known.Zero &= Known2.Zero;
+ Known = KnownBits::commonBits(Known, Known2);
}
// If we don't know any bits, early out.
if (Known.isUnknown())
@@ -2710,8 +2824,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
}
if (!!DemandedSrcElts) {
Known2 = computeKnownBits(Src, DemandedSrcElts, Depth + 1);
- Known.One &= Known2.One;
- Known.Zero &= Known2.Zero;
+ Known = KnownBits::commonBits(Known, Known2);
}
break;
}
@@ -2830,35 +2943,13 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
case ISD::MUL: {
Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
-
- // If low bits are zero in either operand, output low known-0 bits.
- // Also compute a conservative estimate for high known-0 bits.
- // More trickiness is possible, but this is sufficient for the
- // interesting case of alignment computation.
- unsigned TrailZ = Known.countMinTrailingZeros() +
- Known2.countMinTrailingZeros();
- unsigned LeadZ = std::max(Known.countMinLeadingZeros() +
- Known2.countMinLeadingZeros(),
- BitWidth) - BitWidth;
-
- Known.resetAll();
- Known.Zero.setLowBits(std::min(TrailZ, BitWidth));
- Known.Zero.setHighBits(std::min(LeadZ, BitWidth));
+ Known = KnownBits::computeForMul(Known, Known2);
break;
}
case ISD::UDIV: {
- // For the purposes of computing leading zeros we can conservatively
- // treat a udiv as a logical right shift by the power of 2 known to
- // be less than the denominator.
- Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
- unsigned LeadZ = Known2.countMinLeadingZeros();
-
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
- unsigned RHSMaxLeadingZeros = Known2.countMaxLeadingZeros();
- if (RHSMaxLeadingZeros != BitWidth)
- LeadZ = std::min(BitWidth, LeadZ + BitWidth - RHSMaxLeadingZeros - 1);
-
- Known.Zero.setHighBits(LeadZ);
+ Known = KnownBits::udiv(Known, Known2);
break;
}
case ISD::SELECT:
@@ -2870,8 +2961,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth+1);
// Only known if known in both the LHS and RHS.
- Known.One &= Known2.One;
- Known.Zero &= Known2.Zero;
+ Known = KnownBits::commonBits(Known, Known2);
break;
case ISD::SELECT_CC:
Known = computeKnownBits(Op.getOperand(3), DemandedElts, Depth+1);
@@ -2881,8 +2971,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known2 = computeKnownBits(Op.getOperand(2), DemandedElts, Depth+1);
// Only known if known in both the LHS and RHS.
- Known.One &= Known2.One;
- Known.Zero &= Known2.Zero;
+ Known = KnownBits::commonBits(Known, Known2);
break;
case ISD::SMULO:
case ISD::UMULO:
@@ -2911,19 +3000,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
}
case ISD::SHL:
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
-
- if (const APInt *ShAmt = getValidShiftAmountConstant(Op, DemandedElts)) {
- unsigned Shift = ShAmt->getZExtValue();
- Known.Zero <<= Shift;
- Known.One <<= Shift;
- // Low bits are known zero.
- Known.Zero.setLowBits(Shift);
- break;
- }
-
- // No matter the shift amount, the trailing zeros will stay zero.
- Known.Zero = APInt::getLowBitsSet(BitWidth, Known.countMinTrailingZeros());
- Known.One.clearAllBits();
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known = KnownBits::shl(Known, Known2);
// Minimum shift low bits are known zero.
if (const APInt *ShMinAmt =
@@ -2932,19 +3010,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
break;
case ISD::SRL:
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
-
- if (const APInt *ShAmt = getValidShiftAmountConstant(Op, DemandedElts)) {
- unsigned Shift = ShAmt->getZExtValue();
- Known.Zero.lshrInPlace(Shift);
- Known.One.lshrInPlace(Shift);
- // High bits are known zero.
- Known.Zero.setHighBits(Shift);
- break;
- }
-
- // No matter the shift amount, the leading zeros will stay zero.
- Known.Zero = APInt::getHighBitsSet(BitWidth, Known.countMinLeadingZeros());
- Known.One.clearAllBits();
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known = KnownBits::lshr(Known, Known2);
// Minimum shift high bits are known zero.
if (const APInt *ShMinAmt =
@@ -2952,13 +3019,10 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known.Zero.setHighBits(ShMinAmt->getZExtValue());
break;
case ISD::SRA:
- if (const APInt *ShAmt = getValidShiftAmountConstant(Op, DemandedElts)) {
- Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
- unsigned Shift = ShAmt->getZExtValue();
- // Sign extend known zero/one bit (else is unknown).
- Known.Zero.ashrInPlace(Shift);
- Known.One.ashrInPlace(Shift);
- }
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known = KnownBits::ashr(Known, Known2);
+ // TODO: Add minimum shift high known sign bits.
break;
case ISD::FSHL:
case ISD::FSHR:
@@ -2993,38 +3057,9 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
}
break;
case ISD::SIGN_EXTEND_INREG: {
- EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
- unsigned EBits = EVT.getScalarSizeInBits();
-
- // Sign extension. Compute the demanded bits in the result that are not
- // present in the input.
- APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - EBits);
-
- APInt InSignMask = APInt::getSignMask(EBits);
- APInt InputDemandedBits = APInt::getLowBitsSet(BitWidth, EBits);
-
- // If the sign extended bits are demanded, we know that the sign
- // bit is demanded.
- InSignMask = InSignMask.zext(BitWidth);
- if (NewBits.getBoolValue())
- InputDemandedBits |= InSignMask;
-
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
- Known.One &= InputDemandedBits;
- Known.Zero &= InputDemandedBits;
-
- // If the sign bit of the input is known set or clear, then we know the
- // top bits of the result.
- if (Known.Zero.intersects(InSignMask)) { // Input sign bit known clear
- Known.Zero |= NewBits;
- Known.One &= ~NewBits;
- } else if (Known.One.intersects(InSignMask)) { // Input sign bit known set
- Known.One |= NewBits;
- Known.Zero &= ~NewBits;
- } else { // Input sign bit unknown
- Known.Zero &= ~NewBits;
- Known.One &= ~NewBits;
- }
+ EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ Known = Known.sextInReg(EVT.getScalarSizeInBits());
break;
}
case ISD::CTTZ:
@@ -3052,6 +3087,11 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known.Zero.setBitsFrom(Log2_32(PossibleOnes) + 1);
break;
}
+ case ISD::PARITY: {
+ // Parity returns 0 everywhere but the LSB.
+ Known.Zero.setBitsFrom(1);
+ break;
+ }
case ISD::LOAD: {
LoadSDNode *LD = cast<LoadSDNode>(Op);
const Constant *Cst = TLI->getTargetConstantFromLoad(LD);
@@ -3095,13 +3135,10 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
}
} else if (BitWidth == CstTy->getPrimitiveSizeInBits()) {
if (auto *CInt = dyn_cast<ConstantInt>(Cst)) {
- const APInt &Value = CInt->getValue();
- Known.One = Value;
- Known.Zero = ~Value;
+ Known = KnownBits::makeConstant(CInt->getValue());
} else if (auto *CFP = dyn_cast<ConstantFP>(Cst)) {
- APInt Value = CFP->getValueAPF().bitcastToAPInt();
- Known.One = Value;
- Known.Zero = ~Value;
+ Known =
+ KnownBits::makeConstant(CFP->getValueAPF().bitcastToAPInt());
}
}
}
@@ -3241,53 +3278,16 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known = KnownBits::computeForAddCarry(Known, Known2, Carry);
break;
}
- case ISD::SREM:
- if (ConstantSDNode *Rem = isConstOrConstSplat(Op.getOperand(1))) {
- const APInt &RA = Rem->getAPIntValue().abs();
- if (RA.isPowerOf2()) {
- APInt LowBits = RA - 1;
- Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
-
- // The low bits of the first operand are unchanged by the srem.
- Known.Zero = Known2.Zero & LowBits;
- Known.One = Known2.One & LowBits;
-
- // If the first operand is non-negative or has all low bits zero, then
- // the upper bits are all zero.
- if (Known2.isNonNegative() || LowBits.isSubsetOf(Known2.Zero))
- Known.Zero |= ~LowBits;
-
- // If the first operand is negative and not all low bits are zero, then
- // the upper bits are all one.
- if (Known2.isNegative() && LowBits.intersects(Known2.One))
- Known.One |= ~LowBits;
- assert((Known.Zero & Known.One) == 0&&"Bits known to be one AND zero?");
- }
- }
+ case ISD::SREM: {
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known = KnownBits::srem(Known, Known2);
break;
+ }
case ISD::UREM: {
- if (ConstantSDNode *Rem = isConstOrConstSplat(Op.getOperand(1))) {
- const APInt &RA = Rem->getAPIntValue();
- if (RA.isPowerOf2()) {
- APInt LowBits = (RA - 1);
- Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
-
- // The upper bits are all zero, the lower ones are unchanged.
- Known.Zero = Known2.Zero | ~LowBits;
- Known.One = Known2.One & LowBits;
- break;
- }
- }
-
- // Since the result is less than or equal to either operand, any leading
- // zero bits in either operand must also exist in the result.
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
-
- uint32_t Leaders =
- std::max(Known.countMinLeadingZeros(), Known2.countMinLeadingZeros());
- Known.resetAll();
- Known.Zero.setHighBits(Leaders);
+ Known = KnownBits::urem(Known, Known2);
break;
}
case ISD::EXTRACT_ELEMENT: {
@@ -3307,6 +3307,9 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
SDValue InVec = Op.getOperand(0);
SDValue EltNo = Op.getOperand(1);
EVT VecVT = InVec.getValueType();
+ // computeKnownBits not yet implemented for scalable vectors.
+ if (VecVT.isScalableVector())
+ break;
const unsigned EltBitWidth = VecVT.getScalarSizeInBits();
const unsigned NumSrcElts = VecVT.getVectorNumElements();
@@ -3347,73 +3350,39 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known.Zero.setAllBits();
if (DemandedVal) {
Known2 = computeKnownBits(InVal, Depth + 1);
- Known.One &= Known2.One.zextOrTrunc(BitWidth);
- Known.Zero &= Known2.Zero.zextOrTrunc(BitWidth);
+ Known = KnownBits::commonBits(Known, Known2.zextOrTrunc(BitWidth));
}
if (!!DemandedVecElts) {
Known2 = computeKnownBits(InVec, DemandedVecElts, Depth + 1);
- Known.One &= Known2.One;
- Known.Zero &= Known2.Zero;
+ Known = KnownBits::commonBits(Known, Known2);
}
break;
}
case ISD::BITREVERSE: {
Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
- Known.Zero = Known2.Zero.reverseBits();
- Known.One = Known2.One.reverseBits();
+ Known = Known2.reverseBits();
break;
}
case ISD::BSWAP: {
Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
- Known.Zero = Known2.Zero.byteSwap();
- Known.One = Known2.One.byteSwap();
+ Known = Known2.byteSwap();
break;
}
case ISD::ABS: {
Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
-
- // If the source's MSB is zero then we know the rest of the bits already.
- if (Known2.isNonNegative()) {
- Known.Zero = Known2.Zero;
- Known.One = Known2.One;
- break;
- }
-
- // We only know that the absolute values's MSB will be zero iff there is
- // a set bit that isn't the sign bit (otherwise it could be INT_MIN).
- Known2.One.clearSignBit();
- if (Known2.One.getBoolValue()) {
- Known.Zero = APInt::getSignMask(BitWidth);
- break;
- }
+ Known = Known2.abs();
break;
}
case ISD::UMIN: {
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
-
- // UMIN - we know that the result will have the maximum of the
- // known zero leading bits of the inputs.
- unsigned LeadZero = Known.countMinLeadingZeros();
- LeadZero = std::max(LeadZero, Known2.countMinLeadingZeros());
-
- Known.Zero &= Known2.Zero;
- Known.One &= Known2.One;
- Known.Zero.setHighBits(LeadZero);
+ Known = KnownBits::umin(Known, Known2);
break;
}
case ISD::UMAX: {
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
-
- // UMAX - we know that the result will have the maximum of the
- // known one leading bits of the inputs.
- unsigned LeadOne = Known.countMinLeadingOnes();
- LeadOne = std::max(LeadOne, Known2.countMinLeadingOnes());
-
- Known.Zero &= Known2.Zero;
- Known.One &= Known2.One;
- Known.One.setHighBits(LeadOne);
+ Known = KnownBits::umax(Known, Known2);
break;
}
case ISD::SMIN:
@@ -3447,12 +3416,12 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
}
}
- // Fallback - just get the shared known bits of the operands.
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
- if (Known.isUnknown()) break; // Early-out
Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
- Known.Zero &= Known2.Zero;
- Known.One &= Known2.One;
+ if (IsMax)
+ Known = KnownBits::smax(Known, Known2);
+ else
+ Known = KnownBits::smin(Known, Known2);
break;
}
case ISD::FrameIndex:
@@ -4395,11 +4364,16 @@ static SDValue foldCONCAT_VECTORS(const SDLoc &DL, EVT VT,
for (SDValue Op : Elts)
SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
- if (SVT.bitsGT(VT.getScalarType()))
- for (SDValue &Op : Elts)
- Op = DAG.getTargetLoweringInfo().isZExtFree(Op.getValueType(), SVT)
- ? DAG.getZExtOrTrunc(Op, DL, SVT)
- : DAG.getSExtOrTrunc(Op, DL, SVT);
+ if (SVT.bitsGT(VT.getScalarType())) {
+ for (SDValue &Op : Elts) {
+ if (Op.isUndef())
+ Op = DAG.getUNDEF(SVT);
+ else
+ Op = DAG.getTargetLoweringInfo().isZExtFree(Op.getValueType(), SVT)
+ ? DAG.getZExtOrTrunc(Op, DL, SVT)
+ : DAG.getSExtOrTrunc(Op, DL, SVT);
+ }
+ }
SDValue V = DAG.getBuildVector(VT, DL, Elts);
NewSDValueDbgMsg(V, "New node fold concat vectors: ", &DAG);
@@ -4425,6 +4399,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT) {
}
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
+ SDValue Operand) {
+ SDNodeFlags Flags;
+ if (Inserter)
+ Flags = Inserter->getFlags();
+ return getNode(Opcode, DL, VT, Operand, Flags);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
SDValue Operand, const SDNodeFlags Flags) {
// Constant fold unary operations with an integer constant operand. Even
// opaque constant will be folded, because the folding of unary operations
@@ -4625,8 +4607,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
Operand.getValueType().isFloatingPoint() && "Invalid FP cast!");
if (Operand.getValueType() == VT) return Operand; // noop conversion.
assert((!VT.isVector() ||
- VT.getVectorNumElements() ==
- Operand.getValueType().getVectorNumElements()) &&
+ VT.getVectorElementCount() ==
+ Operand.getValueType().getVectorElementCount()) &&
"Vector element count mismatch!");
assert(Operand.getValueType().bitsLT(VT) &&
"Invalid fpext node, dst < src!");
@@ -4811,6 +4793,25 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::VSCALE:
assert(VT == Operand.getValueType() && "Unexpected VT!");
break;
+ case ISD::CTPOP:
+ if (Operand.getValueType().getScalarType() == MVT::i1)
+ return Operand;
+ break;
+ case ISD::CTLZ:
+ case ISD::CTTZ:
+ if (Operand.getValueType().getScalarType() == MVT::i1)
+ return getNOT(DL, Operand, Operand.getValueType());
+ break;
+ case ISD::VECREDUCE_SMIN:
+ case ISD::VECREDUCE_UMAX:
+ if (Operand.getValueType().getScalarType() == MVT::i1)
+ return getNode(ISD::VECREDUCE_OR, DL, VT, Operand);
+ break;
+ case ISD::VECREDUCE_SMAX:
+ case ISD::VECREDUCE_UMIN:
+ if (Operand.getValueType().getScalarType() == MVT::i1)
+ return getNode(ISD::VECREDUCE_AND, DL, VT, Operand);
+ break;
}
SDNode *N;
@@ -5233,6 +5234,14 @@ SDValue SelectionDAG::getAssertAlign(const SDLoc &DL, SDValue Val, Align A) {
}
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
+ SDValue N1, SDValue N2) {
+ SDNodeFlags Flags;
+ if (Inserter)
+ Flags = Inserter->getFlags();
+ return getNode(Opcode, DL, VT, N1, N2, Flags);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
SDValue N1, SDValue N2, const SDNodeFlags Flags) {
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
@@ -5312,10 +5321,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::MULHS:
case ISD::SDIV:
case ISD::SREM:
- case ISD::SMIN:
- case ISD::SMAX:
- case ISD::UMIN:
- case ISD::UMAX:
case ISD::SADDSAT:
case ISD::SSUBSAT:
case ISD::UADDSAT:
@@ -5324,6 +5329,22 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
assert(N1.getValueType() == N2.getValueType() &&
N1.getValueType() == VT && "Binary operator types must match!");
break;
+ case ISD::SMIN:
+ case ISD::UMAX:
+ assert(VT.isInteger() && "This operator does not apply to FP types!");
+ assert(N1.getValueType() == N2.getValueType() &&
+ N1.getValueType() == VT && "Binary operator types must match!");
+ if (VT.isVector() && VT.getVectorElementType() == MVT::i1)
+ return getNode(ISD::OR, DL, VT, N1, N2);
+ break;
+ case ISD::SMAX:
+ case ISD::UMIN:
+ assert(VT.isInteger() && "This operator does not apply to FP types!");
+ assert(N1.getValueType() == N2.getValueType() &&
+ N1.getValueType() == VT && "Binary operator types must match!");
+ if (VT.isVector() && VT.getVectorElementType() == MVT::i1)
+ return getNode(ISD::AND, DL, VT, N1, N2);
+ break;
case ISD::FADD:
case ISD::FSUB:
case ISD::FMUL:
@@ -5365,8 +5386,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
// amounts. This catches things like trying to shift an i1024 value by an
// i8, which is easy to fall into in generic code that uses
// TLI.getShiftAmount().
- assert(N2.getValueType().getScalarSizeInBits().getFixedSize() >=
- Log2_32_Ceil(VT.getScalarSizeInBits().getFixedSize()) &&
+ assert(N2.getValueType().getScalarSizeInBits() >=
+ Log2_32_Ceil(VT.getScalarSizeInBits()) &&
"Invalid use of small shift amount with oversized value!");
// Always fold shifts of i1 values so the code generator doesn't need to
@@ -5562,6 +5583,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
(VT.getVectorMinNumElements() + N2C->getZExtValue()) <=
N1VT.getVectorMinNumElements()) &&
"Extract subvector overflow!");
+ assert(N2C->getAPIntValue().getBitWidth() ==
+ TLI->getVectorIdxTy(getDataLayout())
+ .getSizeInBits()
+ .getFixedSize() &&
+ "Constant index for EXTRACT_SUBVECTOR has an invalid size");
// Trivial extraction.
if (VT == N1VT)
@@ -5573,8 +5599,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
// EXTRACT_SUBVECTOR of CONCAT_VECTOR can be simplified if the pieces of
// the concat have the same type as the extract.
- if (N2C && N1.getOpcode() == ISD::CONCAT_VECTORS &&
- N1.getNumOperands() > 0 && VT == N1.getOperand(0).getValueType()) {
+ if (N1.getOpcode() == ISD::CONCAT_VECTORS && N1.getNumOperands() > 0 &&
+ VT == N1.getOperand(0).getValueType()) {
unsigned Factor = VT.getVectorMinNumElements();
return N1.getOperand(N2C->getZExtValue() / Factor);
}
@@ -5671,6 +5697,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
}
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
+ SDValue N1, SDValue N2, SDValue N3) {
+ SDNodeFlags Flags;
+ if (Inserter)
+ Flags = Inserter->getFlags();
+ return getNode(Opcode, DL, VT, N1, N2, N3, Flags);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
SDValue N1, SDValue N2, SDValue N3,
const SDNodeFlags Flags) {
// Perform various simplifications.
@@ -5940,11 +5974,20 @@ static SDValue getMemsetStringVal(EVT VT, const SDLoc &dl, SelectionDAG &DAG,
return SDValue(nullptr, 0);
}
-SDValue SelectionDAG::getMemBasePlusOffset(SDValue Base, int64_t Offset,
+SDValue SelectionDAG::getMemBasePlusOffset(SDValue Base, TypeSize Offset,
const SDLoc &DL,
const SDNodeFlags Flags) {
EVT VT = Base.getValueType();
- return getMemBasePlusOffset(Base, getConstant(Offset, DL, VT), DL, Flags);
+ SDValue Index;
+
+ if (Offset.isScalable())
+ Index = getVScale(DL, Base.getValueType(),
+ APInt(Base.getValueSizeInBits().getFixedSize(),
+ Offset.getKnownMinSize()));
+ else
+ Index = getConstant(Offset.getFixedSize(), DL, VT);
+
+ return getMemBasePlusOffset(Base, Index, DL, Flags);
}
SDValue SelectionDAG::getMemBasePlusOffset(SDValue Ptr, SDValue Offset,
@@ -6039,7 +6082,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
SrcAlign = Alignment;
assert(SrcAlign && "SrcAlign must be set");
ConstantDataArraySlice Slice;
- bool CopyFromConstant = isMemSrcFromConstant(Src, Slice);
+ // If marked as volatile, perform a copy even when marked as constant.
+ bool CopyFromConstant = !isVol && isMemSrcFromConstant(Src, Slice);
bool isZeroConstant = CopyFromConstant && Slice.Array == nullptr;
unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize);
const MemOp Op = isZeroConstant
@@ -6111,8 +6155,9 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
Value = getMemsetStringVal(VT, dl, DAG, TLI, SubSlice);
if (Value.getNode()) {
Store = DAG.getStore(
- Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl),
- DstPtrInfo.getWithOffset(DstOff), Alignment.value(), MMOFlags);
+ Chain, dl, Value,
+ DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl),
+ DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags);
OutChains.push_back(Store);
}
}
@@ -6132,16 +6177,17 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
if (isDereferenceable)
SrcMMOFlags |= MachineMemOperand::MODereferenceable;
- Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain,
- DAG.getMemBasePlusOffset(Src, SrcOff, dl),
- SrcPtrInfo.getWithOffset(SrcOff), VT,
- commonAlignment(*SrcAlign, SrcOff).value(),
- SrcMMOFlags);
+ Value = DAG.getExtLoad(
+ ISD::EXTLOAD, dl, NVT, Chain,
+ DAG.getMemBasePlusOffset(Src, TypeSize::Fixed(SrcOff), dl),
+ SrcPtrInfo.getWithOffset(SrcOff), VT,
+ commonAlignment(*SrcAlign, SrcOff), SrcMMOFlags);
OutLoadChains.push_back(Value.getValue(1));
Store = DAG.getTruncStore(
- Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl),
- DstPtrInfo.getWithOffset(DstOff), VT, Alignment.value(), MMOFlags);
+ Chain, dl, Value,
+ DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl),
+ DstPtrInfo.getWithOffset(DstOff), VT, Alignment, MMOFlags);
OutStoreChains.push_back(Store);
}
SrcOff += VTSize;
@@ -6261,9 +6307,10 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
if (isDereferenceable)
SrcMMOFlags |= MachineMemOperand::MODereferenceable;
- Value = DAG.getLoad(
- VT, dl, Chain, DAG.getMemBasePlusOffset(Src, SrcOff, dl),
- SrcPtrInfo.getWithOffset(SrcOff), SrcAlign->value(), SrcMMOFlags);
+ Value =
+ DAG.getLoad(VT, dl, Chain,
+ DAG.getMemBasePlusOffset(Src, TypeSize::Fixed(SrcOff), dl),
+ SrcPtrInfo.getWithOffset(SrcOff), *SrcAlign, SrcMMOFlags);
LoadValues.push_back(Value);
LoadChains.push_back(Value.getValue(1));
SrcOff += VTSize;
@@ -6275,9 +6322,10 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
unsigned VTSize = VT.getSizeInBits() / 8;
SDValue Store;
- Store = DAG.getStore(
- Chain, dl, LoadValues[i], DAG.getMemBasePlusOffset(Dst, DstOff, dl),
- DstPtrInfo.getWithOffset(DstOff), Alignment.value(), MMOFlags);
+ Store =
+ DAG.getStore(Chain, dl, LoadValues[i],
+ DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl),
+ DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags);
OutChains.push_back(Store);
DstOff += VTSize;
}
@@ -6375,8 +6423,9 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
}
assert(Value.getValueType() == VT && "Value with wrong type.");
SDValue Store = DAG.getStore(
- Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl),
- DstPtrInfo.getWithOffset(DstOff), Alignment.value(),
+ Chain, dl, Value,
+ DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl),
+ DstPtrInfo.getWithOffset(DstOff), Alignment,
isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone);
OutChains.push_back(Store);
DstOff += VT.getSizeInBits() / 8;
@@ -6390,7 +6439,7 @@ static void checkAddrSpaceIsValidForLibcall(const TargetLowering *TLI,
unsigned AS) {
// Lowering memcpy / memset / memmove intrinsics to calls is only valid if all
// pointer operands can be losslessly bitcasted to pointers of address space 0
- if (AS != 0 && !TLI->isNoopAddrSpaceCast(AS, 0)) {
+ if (AS != 0 && !TLI->getTargetMachine().isNoopAddrSpaceCast(AS, 0)) {
report_fatal_error("cannot lower memory intrinsic in address space " +
Twine(AS));
}
@@ -6882,6 +6931,30 @@ SDValue SelectionDAG::getLifetimeNode(bool IsStart, const SDLoc &dl,
return V;
}
+SDValue SelectionDAG::getPseudoProbeNode(const SDLoc &Dl, SDValue Chain,
+ uint64_t Guid, uint64_t Index,
+ uint32_t Attr) {
+ const unsigned Opcode = ISD::PSEUDO_PROBE;
+ const auto VTs = getVTList(MVT::Other);
+ SDValue Ops[] = {Chain};
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTs, Ops);
+ ID.AddInteger(Guid);
+ ID.AddInteger(Index);
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, Dl, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<PseudoProbeSDNode>(
+ Opcode, Dl.getIROrder(), Dl.getDebugLoc(), VTs, Guid, Index, Attr);
+ createOperands(N, Ops);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
/// InferPointerInfo - If the specified ptr/offset is a frame index, infer a
/// MachinePointerInfo record from it. This is particularly useful because the
/// code generator has many cases where it doesn't bother passing in a
@@ -6962,7 +7035,7 @@ SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
assert(VT.isVector() == MemVT.isVector() &&
"Cannot use an ext load to convert to or from a vector!");
assert((!VT.isVector() ||
- VT.getVectorNumElements() == MemVT.getVectorNumElements()) &&
+ VT.getVectorElementCount() == MemVT.getVectorElementCount()) &&
"Cannot use an ext load to change the number of vector elements!");
}
@@ -7041,8 +7114,7 @@ SDValue SelectionDAG::getIndexedLoad(SDValue OrigLoad, const SDLoc &dl,
~(MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable);
return getLoad(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl,
LD->getChain(), Base, Offset, LD->getPointerInfo(),
- LD->getMemoryVT(), LD->getAlignment(), MMOFlags,
- LD->getAAInfo());
+ LD->getMemoryVT(), LD->getAlign(), MMOFlags, LD->getAAInfo());
}
SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val,
@@ -7112,7 +7184,8 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val,
MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO = MF.getMachineMemOperand(
- PtrInfo, MMOFlags, SVT.getStoreSize(), Alignment, AAInfo);
+ PtrInfo, MMOFlags, MemoryLocation::getSizeOrUnknown(SVT.getStoreSize()),
+ Alignment, AAInfo);
return getTruncStore(Chain, dl, Val, Ptr, SVT, MMO);
}
@@ -7133,7 +7206,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val,
assert(VT.isVector() == SVT.isVector() &&
"Cannot use trunc store to convert to or from a vector!");
assert((!VT.isVector() ||
- VT.getVectorNumElements() == SVT.getVectorNumElements()) &&
+ VT.getVectorElementCount() == SVT.getVectorElementCount()) &&
"Cannot use trunc store to change the number of vector elements!");
SDVTList VTs = getVTList(MVT::Other);
@@ -7285,14 +7358,15 @@ SDValue SelectionDAG::getIndexedMaskedStore(SDValue OrigStore, const SDLoc &dl,
SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
ArrayRef<SDValue> Ops,
MachineMemOperand *MMO,
- ISD::MemIndexType IndexType) {
+ ISD::MemIndexType IndexType,
+ ISD::LoadExtType ExtTy) {
assert(Ops.size() == 6 && "Incompatible number of operands");
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::MGATHER, VTs, Ops);
ID.AddInteger(VT.getRawBits());
ID.AddInteger(getSyntheticNodeSubclassData<MaskedGatherSDNode>(
- dl.getIROrder(), VTs, VT, MMO, IndexType));
+ dl.getIROrder(), VTs, VT, MMO, IndexType, ExtTy));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
@@ -7300,17 +7374,22 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
return SDValue(E, 0);
}
+ IndexType = TLI->getCanonicalIndexType(IndexType, VT, Ops[4]);
auto *N = newSDNode<MaskedGatherSDNode>(dl.getIROrder(), dl.getDebugLoc(),
- VTs, VT, MMO, IndexType);
+ VTs, VT, MMO, IndexType, ExtTy);
createOperands(N, Ops);
assert(N->getPassThru().getValueType() == N->getValueType(0) &&
"Incompatible type of the PassThru value in MaskedGatherSDNode");
- assert(N->getMask().getValueType().getVectorNumElements() ==
- N->getValueType(0).getVectorNumElements() &&
+ assert(N->getMask().getValueType().getVectorElementCount() ==
+ N->getValueType(0).getVectorElementCount() &&
"Vector width mismatch between mask and data");
- assert(N->getIndex().getValueType().getVectorNumElements() >=
- N->getValueType(0).getVectorNumElements() &&
+ assert(N->getIndex().getValueType().getVectorElementCount().isScalable() ==
+ N->getValueType(0).getVectorElementCount().isScalable() &&
+ "Scalable flags of index and data do not match");
+ assert(ElementCount::isKnownGE(
+ N->getIndex().getValueType().getVectorElementCount(),
+ N->getValueType(0).getVectorElementCount()) &&
"Vector width mismatch between index and data");
assert(isa<ConstantSDNode>(N->getScale()) &&
cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() &&
@@ -7326,29 +7405,37 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,
ArrayRef<SDValue> Ops,
MachineMemOperand *MMO,
- ISD::MemIndexType IndexType) {
+ ISD::MemIndexType IndexType,
+ bool IsTrunc) {
assert(Ops.size() == 6 && "Incompatible number of operands");
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::MSCATTER, VTs, Ops);
ID.AddInteger(VT.getRawBits());
ID.AddInteger(getSyntheticNodeSubclassData<MaskedScatterSDNode>(
- dl.getIROrder(), VTs, VT, MMO, IndexType));
+ dl.getIROrder(), VTs, VT, MMO, IndexType, IsTrunc));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
cast<MaskedScatterSDNode>(E)->refineAlignment(MMO);
return SDValue(E, 0);
}
+
+ IndexType = TLI->getCanonicalIndexType(IndexType, VT, Ops[4]);
auto *N = newSDNode<MaskedScatterSDNode>(dl.getIROrder(), dl.getDebugLoc(),
- VTs, VT, MMO, IndexType);
+ VTs, VT, MMO, IndexType, IsTrunc);
createOperands(N, Ops);
- assert(N->getMask().getValueType().getVectorNumElements() ==
- N->getValue().getValueType().getVectorNumElements() &&
+ assert(N->getMask().getValueType().getVectorElementCount() ==
+ N->getValue().getValueType().getVectorElementCount() &&
"Vector width mismatch between mask and data");
- assert(N->getIndex().getValueType().getVectorNumElements() >=
- N->getValue().getValueType().getVectorNumElements() &&
+ assert(
+ N->getIndex().getValueType().getVectorElementCount().isScalable() ==
+ N->getValue().getValueType().getVectorElementCount().isScalable() &&
+ "Scalable flags of index and data do not match");
+ assert(ElementCount::isKnownGE(
+ N->getIndex().getValueType().getVectorElementCount(),
+ N->getValue().getValueType().getVectorElementCount()) &&
"Vector width mismatch between index and data");
assert(isa<ConstantSDNode>(N->getScale()) &&
cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() &&
@@ -7452,6 +7539,11 @@ SDValue SelectionDAG::simplifyFPBinop(unsigned Opcode, SDValue X, SDValue Y,
if (YC->getValueAPF().isExactlyValue(1.0))
return X;
+ // X * 0.0 --> 0.0
+ if (Opcode == ISD::FMUL && Flags.hasNoNaNs() && Flags.hasNoSignedZeros())
+ if (YC->getValueAPF().isZero())
+ return getConstantFP(0.0, SDLoc(Y), Y.getValueType());
+
return SDValue();
}
@@ -7478,6 +7570,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
}
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
+ ArrayRef<SDValue> Ops) {
+ SDNodeFlags Flags;
+ if (Inserter)
+ Flags = Inserter->getFlags();
+ return getNode(Opcode, DL, VT, Ops, Flags);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
ArrayRef<SDValue> Ops, const SDNodeFlags Flags) {
unsigned NumOps = Ops.size();
switch (NumOps) {
@@ -7549,6 +7649,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL,
}
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
+ ArrayRef<SDValue> Ops) {
+ SDNodeFlags Flags;
+ if (Inserter)
+ Flags = Inserter->getFlags();
+ return getNode(Opcode, DL, VTList, Ops, Flags);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
ArrayRef<SDValue> Ops, const SDNodeFlags Flags) {
if (VTList.NumVTs == 1)
return getNode(Opcode, DL, VTList.VTs[0], Ops);
@@ -8245,6 +8353,14 @@ SDValue SelectionDAG::getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT,
/// getNodeIfExists - Get the specified node if it's already available, or
/// else return NULL.
SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,
+ ArrayRef<SDValue> Ops) {
+ SDNodeFlags Flags;
+ if (Inserter)
+ Flags = Inserter->getFlags();
+ return getNodeIfExists(Opcode, VTList, Ops, Flags);
+}
+
+SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,
ArrayRef<SDValue> Ops,
const SDNodeFlags Flags) {
if (VTList.VTs[VTList.NumVTs - 1] != MVT::Glue) {
@@ -8259,6 +8375,19 @@ SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,
return nullptr;
}
+/// doesNodeExist - Check if a node exists without modifying its flags.
+bool SelectionDAG::doesNodeExist(unsigned Opcode, SDVTList VTList,
+ ArrayRef<SDValue> Ops) {
+ if (VTList.VTs[VTList.NumVTs - 1] != MVT::Glue) {
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTList, Ops);
+ void *IP = nullptr;
+ if (FindNodeOrInsertPos(ID, SDLoc(), IP))
+ return true;
+ }
+ return false;
+}
+
/// getDbgValue - Creates a SDDbgValue node.
///
/// SDNode
@@ -8676,21 +8805,31 @@ namespace {
} // end anonymous namespace
-void SelectionDAG::updateDivergence(SDNode * N)
-{
- if (TLI->isSDNodeAlwaysUniform(N))
- return;
- bool IsDivergent = TLI->isSDNodeSourceOfDivergence(N, FLI, DA);
+bool SelectionDAG::calculateDivergence(SDNode *N) {
+ if (TLI->isSDNodeAlwaysUniform(N)) {
+ assert(!TLI->isSDNodeSourceOfDivergence(N, FLI, DA) &&
+ "Conflicting divergence information!");
+ return false;
+ }
+ if (TLI->isSDNodeSourceOfDivergence(N, FLI, DA))
+ return true;
for (auto &Op : N->ops()) {
- if (Op.Val.getValueType() != MVT::Other)
- IsDivergent |= Op.getNode()->isDivergent();
+ if (Op.Val.getValueType() != MVT::Other && Op.getNode()->isDivergent())
+ return true;
}
- if (N->SDNodeBits.IsDivergent != IsDivergent) {
- N->SDNodeBits.IsDivergent = IsDivergent;
- for (auto U : N->uses()) {
- updateDivergence(U);
+ return false;
+}
+
+void SelectionDAG::updateDivergence(SDNode *N) {
+ SmallVector<SDNode *, 16> Worklist(1, N);
+ do {
+ N = Worklist.pop_back_val();
+ bool IsDivergent = calculateDivergence(N);
+ if (N->SDNodeBits.IsDivergent != IsDivergent) {
+ N->SDNodeBits.IsDivergent = IsDivergent;
+ llvm::append_range(Worklist, N->uses());
}
- }
+ } while (!Worklist.empty());
}
void SelectionDAG::CreateTopologicalOrder(std::vector<SDNode *> &Order) {
@@ -8716,26 +8855,9 @@ void SelectionDAG::CreateTopologicalOrder(std::vector<SDNode *> &Order) {
void SelectionDAG::VerifyDAGDiverence() {
std::vector<SDNode *> TopoOrder;
CreateTopologicalOrder(TopoOrder);
- const TargetLowering &TLI = getTargetLoweringInfo();
- DenseMap<const SDNode *, bool> DivergenceMap;
- for (auto &N : allnodes()) {
- DivergenceMap[&N] = false;
- }
- for (auto N : TopoOrder) {
- bool IsDivergent = DivergenceMap[N];
- bool IsSDNodeDivergent = TLI.isSDNodeSourceOfDivergence(N, FLI, DA);
- for (auto &Op : N->ops()) {
- if (Op.Val.getValueType() != MVT::Other)
- IsSDNodeDivergent |= DivergenceMap[Op.getNode()];
- }
- if (!IsDivergent && IsSDNodeDivergent && !TLI.isSDNodeAlwaysUniform(N)) {
- DivergenceMap[N] = true;
- }
- }
- for (auto &N : allnodes()) {
- (void)N;
- assert(DivergenceMap[&N] == N.isDivergent() &&
- "Divergence bit inconsistency detected\n");
+ for (auto *N : TopoOrder) {
+ assert(calculateDivergence(N) == N->isDivergent() &&
+ "Divergence bit inconsistency detected");
}
}
#endif
@@ -8904,25 +9026,32 @@ void SelectionDAG::AddDbgLabel(SDDbgLabel *DB) {
DbgInfo->add(DB);
}
-SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad,
- SDValue NewMemOp) {
- assert(isa<MemSDNode>(NewMemOp.getNode()) && "Expected a memop node");
+SDValue SelectionDAG::makeEquivalentMemoryOrdering(SDValue OldChain,
+ SDValue NewMemOpChain) {
+ assert(isa<MemSDNode>(NewMemOpChain) && "Expected a memop node");
+ assert(NewMemOpChain.getValueType() == MVT::Other && "Expected a token VT");
// The new memory operation must have the same position as the old load in
// terms of memory dependency. Create a TokenFactor for the old load and new
// memory operation and update uses of the old load's output chain to use that
// TokenFactor.
- SDValue OldChain = SDValue(OldLoad, 1);
- SDValue NewChain = SDValue(NewMemOp.getNode(), 1);
- if (OldChain == NewChain || !OldLoad->hasAnyUseOfValue(1))
- return NewChain;
+ if (OldChain == NewMemOpChain || OldChain.use_empty())
+ return NewMemOpChain;
- SDValue TokenFactor =
- getNode(ISD::TokenFactor, SDLoc(OldLoad), MVT::Other, OldChain, NewChain);
+ SDValue TokenFactor = getNode(ISD::TokenFactor, SDLoc(OldChain), MVT::Other,
+ OldChain, NewMemOpChain);
ReplaceAllUsesOfValueWith(OldChain, TokenFactor);
- UpdateNodeOperands(TokenFactor.getNode(), OldChain, NewChain);
+ UpdateNodeOperands(TokenFactor.getNode(), OldChain, NewMemOpChain);
return TokenFactor;
}
+SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad,
+ SDValue NewMemOp) {
+ assert(isa<MemSDNode>(NewMemOp.getNode()) && "Expected a memop node");
+ SDValue OldChain = SDValue(OldLoad, 1);
+ SDValue NewMemOpChain = NewMemOp.getValue(1);
+ return makeEquivalentMemoryOrdering(OldChain, NewMemOpChain);
+}
+
SDValue SelectionDAG::getSymbolFunctionGlobalAddress(SDValue Op,
Function **OutFunction) {
assert(isa<ExternalSymbolSDNode>(Op) && "Node should be an ExternalSymbol");
@@ -9006,6 +9135,18 @@ ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, bool AllowUndefs,
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N))
return CN;
+ // SplatVectors can truncate their operands. Ignore that case here unless
+ // AllowTruncation is set.
+ if (N->getOpcode() == ISD::SPLAT_VECTOR) {
+ EVT VecEltVT = N->getValueType(0).getVectorElementType();
+ if (auto *CN = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
+ EVT CVT = CN->getValueType(0);
+ assert(CVT.bitsGE(VecEltVT) && "Illegal splat_vector element extension");
+ if (AllowTruncation || CVT == VecEltVT)
+ return CN;
+ }
+ }
+
if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
BitVector UndefElements;
ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements);
@@ -9059,6 +9200,10 @@ ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N, bool AllowUndefs) {
return CN;
}
+ if (N.getOpcode() == ISD::SPLAT_VECTOR)
+ if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N.getOperand(0)))
+ return CN;
+
return nullptr;
}
@@ -9220,8 +9365,7 @@ bool SDNode::areOnlyUsersOf(ArrayRef<const SDNode *> Nodes, const SDNode *N) {
bool Seen = false;
for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
SDNode *User = *I;
- if (llvm::any_of(Nodes,
- [&User](const SDNode *Node) { return User == Node; }))
+ if (llvm::is_contained(Nodes, User))
Seen = true;
else
return false;
@@ -9232,7 +9376,7 @@ bool SDNode::areOnlyUsersOf(ArrayRef<const SDNode *> Nodes, const SDNode *N) {
/// isOperand - Return true if this node is an operand of N.
bool SDValue::isOperandOf(const SDNode *N) const {
- return any_of(N->op_values(), [this](SDValue Op) { return *this == Op; });
+ return is_contained(N->op_values(), *this);
}
bool SDNode::isOperandOf(const SDNode *N) const {
@@ -9616,24 +9760,24 @@ std::pair<EVT, EVT>
SelectionDAG::GetDependentSplitDestVTs(const EVT &VT, const EVT &EnvVT,
bool *HiIsEmpty) const {
EVT EltTp = VT.getVectorElementType();
- bool IsScalable = VT.isScalableVector();
// Examples:
// custom VL=8 with enveloping VL=8/8 yields 8/0 (hi empty)
// custom VL=9 with enveloping VL=8/8 yields 8/1
// custom VL=10 with enveloping VL=8/8 yields 8/2
// etc.
- unsigned VTNumElts = VT.getVectorNumElements();
- unsigned EnvNumElts = EnvVT.getVectorNumElements();
+ ElementCount VTNumElts = VT.getVectorElementCount();
+ ElementCount EnvNumElts = EnvVT.getVectorElementCount();
+ assert(VTNumElts.isScalable() == EnvNumElts.isScalable() &&
+ "Mixing fixed width and scalable vectors when enveloping a type");
EVT LoVT, HiVT;
- if (VTNumElts > EnvNumElts) {
+ if (VTNumElts.getKnownMinValue() > EnvNumElts.getKnownMinValue()) {
LoVT = EnvVT;
- HiVT = EVT::getVectorVT(*getContext(), EltTp, VTNumElts - EnvNumElts,
- IsScalable);
+ HiVT = EVT::getVectorVT(*getContext(), EltTp, VTNumElts - EnvNumElts);
*HiIsEmpty = false;
} else {
// Flag that hi type has zero storage size, but return split envelop type
// (this would be easier if vector types with zero elements were allowed).
- LoVT = EVT::getVectorVT(*getContext(), EltTp, VTNumElts, IsScalable);
+ LoVT = EVT::getVectorVT(*getContext(), EltTp, VTNumElts);
HiVT = EnvVT;
*HiIsEmpty = true;
}
@@ -9768,16 +9912,16 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, APInt &SplatUndef,
SDValue BuildVectorSDNode::getSplatValue(const APInt &DemandedElts,
BitVector *UndefElements) const {
+ unsigned NumOps = getNumOperands();
if (UndefElements) {
UndefElements->clear();
- UndefElements->resize(getNumOperands());
+ UndefElements->resize(NumOps);
}
- assert(getNumOperands() == DemandedElts.getBitWidth() &&
- "Unexpected vector size");
+ assert(NumOps == DemandedElts.getBitWidth() && "Unexpected vector size");
if (!DemandedElts)
return SDValue();
SDValue Splatted;
- for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ for (unsigned i = 0; i != NumOps; ++i) {
if (!DemandedElts[i])
continue;
SDValue Op = getOperand(i);
@@ -9806,6 +9950,58 @@ SDValue BuildVectorSDNode::getSplatValue(BitVector *UndefElements) const {
return getSplatValue(DemandedElts, UndefElements);
}
+bool BuildVectorSDNode::getRepeatedSequence(const APInt &DemandedElts,
+ SmallVectorImpl<SDValue> &Sequence,
+ BitVector *UndefElements) const {
+ unsigned NumOps = getNumOperands();
+ Sequence.clear();
+ if (UndefElements) {
+ UndefElements->clear();
+ UndefElements->resize(NumOps);
+ }
+ assert(NumOps == DemandedElts.getBitWidth() && "Unexpected vector size");
+ if (!DemandedElts || NumOps < 2 || !isPowerOf2_32(NumOps))
+ return false;
+
+ // Set the undefs even if we don't find a sequence (like getSplatValue).
+ if (UndefElements)
+ for (unsigned I = 0; I != NumOps; ++I)
+ if (DemandedElts[I] && getOperand(I).isUndef())
+ (*UndefElements)[I] = true;
+
+ // Iteratively widen the sequence length looking for repetitions.
+ for (unsigned SeqLen = 1; SeqLen < NumOps; SeqLen *= 2) {
+ Sequence.append(SeqLen, SDValue());
+ for (unsigned I = 0; I != NumOps; ++I) {
+ if (!DemandedElts[I])
+ continue;
+ SDValue &SeqOp = Sequence[I % SeqLen];
+ SDValue Op = getOperand(I);
+ if (Op.isUndef()) {
+ if (!SeqOp)
+ SeqOp = Op;
+ continue;
+ }
+ if (SeqOp && !SeqOp.isUndef() && SeqOp != Op) {
+ Sequence.clear();
+ break;
+ }
+ SeqOp = Op;
+ }
+ if (!Sequence.empty())
+ return true;
+ }
+
+ assert(Sequence.empty() && "Failed to empty non-repeating sequence pattern");
+ return false;
+}
+
+bool BuildVectorSDNode::getRepeatedSequence(SmallVectorImpl<SDValue> &Sequence,
+ BitVector *UndefElements) const {
+ APInt DemandedElts = APInt::getAllOnesValue(getNumOperands());
+ return getRepeatedSequence(DemandedElts, Sequence, UndefElements);
+}
+
ConstantSDNode *
BuildVectorSDNode::getConstantSplatNode(const APInt &DemandedElts,
BitVector *UndefElements) const {
@@ -9878,7 +10074,7 @@ bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) {
// Returns the SDNode if it is a constant integer BuildVector
// or constant integer.
-SDNode *SelectionDAG::isConstantIntBuildVectorOrConstantInt(SDValue N) {
+SDNode *SelectionDAG::isConstantIntBuildVectorOrConstantInt(SDValue N) const {
if (isa<ConstantSDNode>(N))
return N.getNode();
if (ISD::isBuildVectorOfConstantSDNodes(N.getNode()))
@@ -9889,10 +10085,15 @@ SDNode *SelectionDAG::isConstantIntBuildVectorOrConstantInt(SDValue N) {
if (GA->getOpcode() == ISD::GlobalAddress &&
TLI->isOffsetFoldingLegal(GA))
return GA;
+ if ((N.getOpcode() == ISD::SPLAT_VECTOR) &&
+ isa<ConstantSDNode>(N.getOperand(0)))
+ return N.getNode();
return nullptr;
}
-SDNode *SelectionDAG::isConstantFPBuildVectorOrConstantFP(SDValue N) {
+// Returns the SDNode if it is a constant float BuildVector
+// or constant float.
+SDNode *SelectionDAG::isConstantFPBuildVectorOrConstantFP(SDValue N) const {
if (isa<ConstantFPSDNode>(N))
return N.getNode();
@@ -9914,13 +10115,14 @@ void SelectionDAG::createOperands(SDNode *Node, ArrayRef<SDValue> Vals) {
Ops[I].setUser(Node);
Ops[I].setInitial(Vals[I]);
if (Ops[I].Val.getValueType() != MVT::Other) // Skip Chain. It does not carry divergence.
- IsDivergent = IsDivergent || Ops[I].getNode()->isDivergent();
+ IsDivergent |= Ops[I].getNode()->isDivergent();
}
Node->NumOperands = Vals.size();
Node->OperandList = Ops;
- IsDivergent |= TLI->isSDNodeSourceOfDivergence(Node, FLI, DA);
- if (!TLI->isSDNodeAlwaysUniform(Node))
+ if (!TLI->isSDNodeAlwaysUniform(Node)) {
+ IsDivergent |= TLI->isSDNodeSourceOfDivergence(Node, FLI, DA);
Node->SDNodeBits.IsDivergent = IsDivergent;
+ }
checkForCycles(Node);
}
@@ -9937,6 +10139,44 @@ SDValue SelectionDAG::getTokenFactor(const SDLoc &DL,
return getNode(ISD::TokenFactor, DL, MVT::Other, Vals);
}
+SDValue SelectionDAG::getNeutralElement(unsigned Opcode, const SDLoc &DL,
+ EVT VT, SDNodeFlags Flags) {
+ switch (Opcode) {
+ default:
+ return SDValue();
+ case ISD::ADD:
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::UMAX:
+ return getConstant(0, DL, VT);
+ case ISD::MUL:
+ return getConstant(1, DL, VT);
+ case ISD::AND:
+ case ISD::UMIN:
+ return getAllOnesConstant(DL, VT);
+ case ISD::SMAX:
+ return getConstant(APInt::getSignedMinValue(VT.getSizeInBits()), DL, VT);
+ case ISD::SMIN:
+ return getConstant(APInt::getSignedMaxValue(VT.getSizeInBits()), DL, VT);
+ case ISD::FADD:
+ return getConstantFP(-0.0, DL, VT);
+ case ISD::FMUL:
+ return getConstantFP(1.0, DL, VT);
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM: {
+ // Neutral element for fminnum is NaN, Inf or FLT_MAX, depending on FMF.
+ const fltSemantics &Semantics = EVTToAPFloatSemantics(VT);
+ APFloat NeutralAF = !Flags.hasNoNaNs() ? APFloat::getQNaN(Semantics) :
+ !Flags.hasNoInfs() ? APFloat::getInf(Semantics) :
+ APFloat::getLargest(Semantics);
+ if (Opcode == ISD::FMAXNUM)
+ NeutralAF.changeSign();
+
+ return getConstantFP(NeutralAF, DL, VT);
+ }
+ }
+}
+
#ifndef NDEBUG
static void checkForCyclesHelper(const SDNode *N,
SmallPtrSetImpl<const SDNode*> &Visited,
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
index 3a53ab9717a4..20c7d771bfb6 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
+#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -96,18 +97,28 @@ bool BaseIndexOffset::computeAliasing(const SDNode *Op0,
int64_t PtrDiff;
if (NumBytes0.hasValue() && NumBytes1.hasValue() &&
BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff)) {
+ // If the size of memory access is unknown, do not use it to analysis.
+ // One example of unknown size memory access is to load/store scalable
+ // vector objects on the stack.
// BasePtr1 is PtrDiff away from BasePtr0. They alias if none of the
// following situations arise:
- IsAlias = !(
- // [----BasePtr0----]
- // [---BasePtr1--]
- // ========PtrDiff========>
- (*NumBytes0 <= PtrDiff) ||
- // [----BasePtr0----]
- // [---BasePtr1--]
- // =====(-PtrDiff)====>
- (PtrDiff + *NumBytes1 <= 0)); // i.e. *NumBytes1 < -PtrDiff.
- return true;
+ if (PtrDiff >= 0 &&
+ *NumBytes0 != static_cast<int64_t>(MemoryLocation::UnknownSize)) {
+ // [----BasePtr0----]
+ // [---BasePtr1--]
+ // ========PtrDiff========>
+ IsAlias = !(*NumBytes0 <= PtrDiff);
+ return true;
+ }
+ if (PtrDiff < 0 &&
+ *NumBytes1 != static_cast<int64_t>(MemoryLocation::UnknownSize)) {
+ // [----BasePtr0----]
+ // [---BasePtr1--]
+ // =====(-PtrDiff)====>
+ IsAlias = !((PtrDiff + *NumBytes1) <= 0);
+ return true;
+ }
+ return false;
}
// If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be
// able to calculate their relative offset if at least one arises
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index d2930391f87a..6638ff6a6358 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -14,15 +14,12 @@
#include "SDNodeDbgValue.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
-#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
@@ -40,7 +37,6 @@
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GCMetadata.h"
-#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -53,17 +49,14 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/SwiftErrorValueTracking.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
-#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
-#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/CodeGen/WinEHFuncInfo.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
@@ -75,13 +68,11 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfoMetadata.h"
-#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
@@ -99,31 +90,22 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/AtomicOrdering.h"
-#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/Casting.h"
-#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/Utils/Local.h"
-#include <algorithm>
-#include <cassert>
#include <cstddef>
-#include <cstdint>
#include <cstring>
#include <iterator>
#include <limits>
#include <numeric>
#include <tuple>
-#include <utility>
-#include <vector>
using namespace llvm;
using namespace PatternMatch;
@@ -422,10 +404,10 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
// vector widening case (e.g. <2 x float> -> <4 x float>). Extract the
// elements we want.
if (PartEVT.getVectorElementType() == ValueVT.getVectorElementType()) {
- assert((PartEVT.getVectorElementCount().Min >
- ValueVT.getVectorElementCount().Min) &&
- (PartEVT.getVectorElementCount().Scalable ==
- ValueVT.getVectorElementCount().Scalable) &&
+ assert((PartEVT.getVectorElementCount().getKnownMinValue() >
+ ValueVT.getVectorElementCount().getKnownMinValue()) &&
+ (PartEVT.getVectorElementCount().isScalable() ==
+ ValueVT.getVectorElementCount().isScalable()) &&
"Cannot narrow, it would be a lossy transformation");
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
DAG.getVectorIdxConstant(0, DL));
@@ -453,7 +435,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
// are the same size, this is an obvious bitcast.
if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) {
return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
- } else if (ValueVT.getSizeInBits() < PartEVT.getSizeInBits()) {
+ } else if (ValueVT.bitsLT(PartEVT)) {
// Bitcast Val back the original type and extract the corresponding
// vector we want.
unsigned Elts = PartEVT.getSizeInBits() / ValueVT.getScalarSizeInBits();
@@ -683,14 +665,14 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
// Promoted vector extract
Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
} else {
- if (ValueVT.getVectorNumElements() == 1) {
+ if (ValueVT.getVectorElementCount().isScalar()) {
Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val,
DAG.getVectorIdxConstant(0, DL));
} else {
- assert(PartVT.getSizeInBits() > ValueVT.getSizeInBits() &&
+ uint64_t ValueSize = ValueVT.getFixedSizeInBits();
+ assert(PartVT.getFixedSizeInBits() > ValueSize &&
"lossy conversion of vector to scalar type");
- EVT IntermediateType =
- EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
+ EVT IntermediateType = EVT::getIntegerVT(*DAG.getContext(), ValueSize);
Val = DAG.getBitcast(IntermediateType, Val);
Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
}
@@ -723,15 +705,15 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
assert(IntermediateVT.isScalableVector() == ValueVT.isScalableVector() &&
"Mixing scalable and fixed vectors when copying in parts");
- ElementCount DestEltCnt;
+ Optional<ElementCount> DestEltCnt;
if (IntermediateVT.isVector())
DestEltCnt = IntermediateVT.getVectorElementCount() * NumIntermediates;
else
- DestEltCnt = ElementCount(NumIntermediates, false);
+ DestEltCnt = ElementCount::getFixed(NumIntermediates);
EVT BuiltVectorTy = EVT::getVectorVT(
- *DAG.getContext(), IntermediateVT.getScalarType(), DestEltCnt);
+ *DAG.getContext(), IntermediateVT.getScalarType(), DestEltCnt.getValue());
if (ValueVT != BuiltVectorTy) {
if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, BuiltVectorTy))
Val = Widened;
@@ -975,7 +957,7 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
// shouldn't try to apply any sort of splitting logic to them.
assert(Regs.size() == RegVTs.size() && Regs.size() == ValueVTs.size() &&
"No 1:1 mapping from clobbers to regs?");
- unsigned SP = TLI.getStackPointerRegisterToSaveRestore();
+ Register SP = TLI.getStackPointerRegisterToSaveRestore();
(void)SP;
for (unsigned I = 0, E = ValueVTs.size(); I != E; ++I) {
Ops.push_back(DAG.getRegister(Regs[I], RegVTs[I]));
@@ -998,14 +980,14 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
}
}
-SmallVector<std::pair<unsigned, unsigned>, 4>
+SmallVector<std::pair<unsigned, TypeSize>, 4>
RegsForValue::getRegsAndSizes() const {
- SmallVector<std::pair<unsigned, unsigned>, 4> OutVec;
+ SmallVector<std::pair<unsigned, TypeSize>, 4> OutVec;
unsigned I = 0;
for (auto CountAndVT : zip_first(RegCount, RegVTs)) {
unsigned RegCount = std::get<0>(CountAndVT);
MVT RegisterVT = std::get<1>(CountAndVT);
- unsigned RegisterSize = RegisterVT.getSizeInBits();
+ TypeSize RegisterSize = RegisterVT.getSizeInBits();
for (unsigned E = I + RegCount; I != E; ++I)
OutVec.push_back(std::make_pair(Regs[I], RegisterSize));
}
@@ -1114,25 +1096,6 @@ void SelectionDAGBuilder::visit(const Instruction &I) {
visit(I.getOpcode(), I);
- if (auto *FPMO = dyn_cast<FPMathOperator>(&I)) {
- // ConstrainedFPIntrinsics handle their own FMF.
- if (!isa<ConstrainedFPIntrinsic>(&I)) {
- // Propagate the fast-math-flags of this IR instruction to the DAG node that
- // maps to this instruction.
- // TODO: We could handle all flags (nsw, etc) here.
- // TODO: If an IR instruction maps to >1 node, only the final node will have
- // flags set.
- if (SDNode *Node = getNodeForIRValue(&I)) {
- SDNodeFlags IncomingFlags;
- IncomingFlags.copyFMF(*FPMO);
- if (!Node->getFlags().isDefined())
- Node->setFlags(IncomingFlags);
- else
- Node->intersectFlagsWith(IncomingFlags);
- }
- }
- }
-
if (!I.isTerminator() && !HasTailCall &&
!isa<GCStatepointInst>(I)) // statepoints handle their exports internally
CopyToExportRegsIfNeeded(&I);
@@ -1178,7 +1141,7 @@ void SelectionDAGBuilder::dropDanglingDebugInfo(const DILocalVariable *Variable,
if (isMatchingDbgValue(DDI))
salvageUnresolvedDbgValue(DDI);
- DDIV.erase(remove_if(DDIV, isMatchingDbgValue), DDIV.end());
+ erase_if(DDIV, isMatchingDbgValue);
}
}
@@ -1551,6 +1514,9 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
if (const BlockAddress *BA = dyn_cast<BlockAddress>(C))
return DAG.getBlockAddress(BA, VT);
+ if (const auto *Equiv = dyn_cast<DSOLocalEquivalent>(C))
+ return getValue(Equiv->getGlobalValue());
+
VectorType *VecTy = cast<VectorType>(V->getType());
// Now that we know the number and type of the elements, get that number of
@@ -1671,10 +1637,32 @@ void SelectionDAGBuilder::visitCleanupPad(const CleanupPadInst &CPI) {
}
}
-// For wasm, there's alwyas a single catch pad attached to a catchswitch, and
-// the control flow always stops at the single catch pad, as it does for a
-// cleanup pad. In case the exception caught is not of the types the catch pad
-// catches, it will be rethrown by a rethrow.
+// In wasm EH, even though a catchpad may not catch an exception if a tag does
+// not match, it is OK to add only the first unwind destination catchpad to the
+// successors, because there will be at least one invoke instruction within the
+// catch scope that points to the next unwind destination, if one exists, so
+// CFGSort cannot mess up with BB sorting order.
+// (All catchpads with 'catch (type)' clauses have a 'llvm.rethrow' intrinsic
+// call within them, and catchpads only consisting of 'catch (...)' have a
+// '__cxa_end_catch' call within them, both of which generate invokes in case
+// the next unwind destination exists, i.e., the next unwind destination is not
+// the caller.)
+//
+// Having at most one EH pad successor is also simpler and helps later
+// transformations.
+//
+// For example,
+// current:
+// invoke void @foo to ... unwind label %catch.dispatch
+// catch.dispatch:
+// %0 = catchswitch within ... [label %catch.start] unwind label %next
+// catch.start:
+// ...
+// ... in this BB or some other child BB dominated by this BB there will be an
+// invoke that points to 'next' BB as an unwind destination
+//
+// next: ; We don't need to add this to 'current' BB's successor
+// ...
static void findWasmUnwindDestinations(
FunctionLoweringInfo &FuncInfo, const BasicBlock *EHPadBB,
BranchProbability Prob,
@@ -1837,7 +1825,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
for (unsigned i = 0; i != NumValues; ++i) {
// An aggregate return value cannot wrap around the address space, so
// offsets to its parts don't wrap either.
- SDValue Ptr = DAG.getObjectPtrOffset(getCurSDLoc(), RetPtr, Offsets[i]);
+ SDValue Ptr = DAG.getObjectPtrOffset(getCurSDLoc(), RetPtr,
+ TypeSize::Fixed(Offsets[i]));
SDValue Val = RetOp.getValue(RetOp.getResNo() + i);
if (MemVTs[i] != ValueVTs[i])
@@ -2118,14 +2107,19 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
}
const Instruction *BOp = dyn_cast<Instruction>(Cond);
+ const Value *BOpOp0, *BOpOp1;
// Compute the effective opcode for Cond, taking into account whether it needs
// to be inverted, e.g.
// and (not (or A, B)), C
// gets lowered as
// and (and (not A, not B), C)
- unsigned BOpc = 0;
+ Instruction::BinaryOps BOpc = (Instruction::BinaryOps)0;
if (BOp) {
- BOpc = BOp->getOpcode();
+ BOpc = match(BOp, m_LogicalAnd(m_Value(BOpOp0), m_Value(BOpOp1)))
+ ? Instruction::And
+ : (match(BOp, m_LogicalOr(m_Value(BOpOp0), m_Value(BOpOp1)))
+ ? Instruction::Or
+ : (Instruction::BinaryOps)0);
if (InvertCond) {
if (BOpc == Instruction::And)
BOpc = Instruction::Or;
@@ -2135,11 +2129,11 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
}
// If this node is not part of the or/and tree, emit it as a branch.
- if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) ||
- BOpc != unsigned(Opc) || !BOp->hasOneUse() ||
- BOp->getParent() != CurBB->getBasicBlock() ||
- !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) ||
- !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) {
+ // Note that all nodes in the tree should have same opcode.
+ bool BOpIsInOrAndTree = BOpc && BOpc == Opc && BOp->hasOneUse();
+ if (!BOpIsInOrAndTree || BOp->getParent() != CurBB->getBasicBlock() ||
+ !InBlock(BOpOp0, CurBB->getBasicBlock()) ||
+ !InBlock(BOpOp1, CurBB->getBasicBlock())) {
EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB,
TProb, FProb, InvertCond);
return;
@@ -2175,15 +2169,15 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
auto NewTrueProb = TProb / 2;
auto NewFalseProb = TProb / 2 + FProb;
// Emit the LHS condition.
- FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc,
- NewTrueProb, NewFalseProb, InvertCond);
+ FindMergedConditions(BOpOp0, TBB, TmpBB, CurBB, SwitchBB, Opc, NewTrueProb,
+ NewFalseProb, InvertCond);
// Normalize A/2 and B to get A/(1+B) and 2B/(1+B).
SmallVector<BranchProbability, 2> Probs{TProb / 2, FProb};
BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
// Emit the RHS condition into TmpBB.
- FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc,
- Probs[0], Probs[1], InvertCond);
+ FindMergedConditions(BOpOp1, TBB, FBB, TmpBB, SwitchBB, Opc, Probs[0],
+ Probs[1], InvertCond);
} else {
assert(Opc == Instruction::And && "Unknown merge op!");
// Codegen X & Y as:
@@ -2208,15 +2202,15 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
auto NewTrueProb = TProb + FProb / 2;
auto NewFalseProb = FProb / 2;
// Emit the LHS condition.
- FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc,
- NewTrueProb, NewFalseProb, InvertCond);
+ FindMergedConditions(BOpOp0, TmpBB, FBB, CurBB, SwitchBB, Opc, NewTrueProb,
+ NewFalseProb, InvertCond);
// Normalize A and B/2 to get 2A/(1+A) and B/(1+A).
SmallVector<BranchProbability, 2> Probs{TProb, FProb / 2};
BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
// Emit the RHS condition into TmpBB.
- FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc,
- Probs[0], Probs[1], InvertCond);
+ FindMergedConditions(BOpOp1, TBB, FBB, TmpBB, SwitchBB, Opc, Probs[0],
+ Probs[1], InvertCond);
}
}
@@ -2293,16 +2287,20 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
// je foo
// cmp D, E
// jle foo
- if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
- Instruction::BinaryOps Opcode = BOp->getOpcode();
- Value *Vec, *BOp0 = BOp->getOperand(0), *BOp1 = BOp->getOperand(1);
- if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp->hasOneUse() &&
- !I.hasMetadata(LLVMContext::MD_unpredictable) &&
- (Opcode == Instruction::And || Opcode == Instruction::Or) &&
- !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) &&
- match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value())))) {
- FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB,
- Opcode,
+ const Instruction *BOp = dyn_cast<Instruction>(CondVal);
+ if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp &&
+ BOp->hasOneUse() && !I.hasMetadata(LLVMContext::MD_unpredictable)) {
+ Value *Vec;
+ const Value *BOp0, *BOp1;
+ Instruction::BinaryOps Opcode = (Instruction::BinaryOps)0;
+ if (match(BOp, m_LogicalAnd(m_Value(BOp0), m_Value(BOp1))))
+ Opcode = Instruction::And;
+ else if (match(BOp, m_LogicalOr(m_Value(BOp0), m_Value(BOp1))))
+ Opcode = Instruction::Or;
+
+ if (Opcode && !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) &&
+ match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value())))) {
+ FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, Opcode,
getEdgeProbability(BrMBB, Succ0MBB),
getEdgeProbability(BrMBB, Succ1MBB),
/*InvertCond=*/false);
@@ -2551,7 +2549,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
SDLoc dl = getCurSDLoc();
SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy);
const Module &M = *ParentBB->getParent()->getFunction().getParent();
- unsigned Align = DL->getPrefTypeAlignment(Type::getInt8PtrTy(M.getContext()));
+ Align Align = DL->getPrefTypeAlign(Type::getInt8PtrTy(M.getContext()));
// Generate code to load the content of the guard slot.
SDValue GuardVal = DAG.getLoad(
@@ -2809,7 +2807,7 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
case Intrinsic::experimental_gc_statepoint:
LowerStatepoint(cast<GCStatepointInst>(I), EHPadBB);
break;
- case Intrinsic::wasm_rethrow_in_catch: {
+ case Intrinsic::wasm_rethrow: {
// This is usually done in visitTargetIntrinsic, but this intrinsic is
// special because it can be invoked, so we manually lower it to a DAG
// node here.
@@ -2817,7 +2815,7 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
Ops.push_back(getRoot()); // inchain
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
Ops.push_back(
- DAG.getTargetConstant(Intrinsic::wasm_rethrow_in_catch, getCurSDLoc(),
+ DAG.getTargetConstant(Intrinsic::wasm_rethrow, getCurSDLoc(),
TLI.getPointerTy(DAG.getDataLayout())));
SDVTList VTs = DAG.getVTList(ArrayRef<EVT>({MVT::Other})); // outchain
DAG.setRoot(DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops));
@@ -2999,20 +2997,6 @@ void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) {
DAG.setRoot(DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot()));
}
-void SelectionDAGBuilder::visitFSub(const User &I) {
- // -0.0 - X --> fneg
- Type *Ty = I.getType();
- if (isa<Constant>(I.getOperand(0)) &&
- I.getOperand(0) == ConstantFP::getZeroValueForNegation(Ty)) {
- SDValue Op2 = getValue(I.getOperand(1));
- setValue(&I, DAG.getNode(ISD::FNEG, getCurSDLoc(),
- Op2.getValueType(), Op2));
- return;
- }
-
- visitBinary(I, ISD::FSUB);
-}
-
void SelectionDAGBuilder::visitUnary(const User &I, unsigned Opcode) {
SDNodeFlags Flags;
@@ -3028,9 +3012,10 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned Opcode) {
Flags.setNoSignedWrap(OFBinOp->hasNoSignedWrap());
Flags.setNoUnsignedWrap(OFBinOp->hasNoUnsignedWrap());
}
- if (auto *ExactOp = dyn_cast<PossiblyExactOperator>(&I)) {
+ if (auto *ExactOp = dyn_cast<PossiblyExactOperator>(&I))
Flags.setExact(ExactOp->isExact());
- }
+ if (auto *FPOp = dyn_cast<FPMathOperator>(&I))
+ Flags.copyFMF(*FPOp);
SDValue Op1 = getValue(I.getOperand(0));
SDValue Op2 = getValue(I.getOperand(1));
@@ -3140,10 +3125,14 @@ void SelectionDAGBuilder::visitFCmp(const User &I) {
SDValue Op2 = getValue(I.getOperand(1));
ISD::CondCode Condition = getFCmpCondCode(predicate);
- auto *FPMO = dyn_cast<FPMathOperator>(&I);
- if ((FPMO && FPMO->hasNoNaNs()) || TM.Options.NoNaNsFPMath)
+ auto *FPMO = cast<FPMathOperator>(&I);
+ if (FPMO->hasNoNaNs() || TM.Options.NoNaNsFPMath)
Condition = getFCmpCodeWithoutNaN(Condition);
+ SDNodeFlags Flags;
+ Flags.copyFMF(*FPMO);
+ SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);
+
EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
I.getType());
setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition));
@@ -3173,6 +3162,11 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
Cond.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT;
bool IsUnaryAbs = false;
+ bool Negate = false;
+
+ SDNodeFlags Flags;
+ if (auto *FPOp = dyn_cast<FPMathOperator>(&I))
+ Flags.copyFMF(*FPOp);
// Min/max matching is only viable if all output VTs are the same.
if (is_splat(ValueVTs)) {
@@ -3233,12 +3227,13 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
break;
}
break;
+ case SPF_NABS:
+ Negate = true;
+ LLVM_FALLTHROUGH;
case SPF_ABS:
IsUnaryAbs = true;
Opc = ISD::ABS;
break;
- case SPF_NABS:
- // TODO: we need to produce sub(0, abs(X)).
default: break;
}
@@ -3265,10 +3260,13 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
if (IsUnaryAbs) {
for (unsigned i = 0; i != NumValues; ++i) {
+ SDLoc dl = getCurSDLoc();
+ EVT VT = LHSVal.getNode()->getValueType(LHSVal.getResNo() + i);
Values[i] =
- DAG.getNode(OpCode, getCurSDLoc(),
- LHSVal.getNode()->getValueType(LHSVal.getResNo() + i),
- SDValue(LHSVal.getNode(), LHSVal.getResNo() + i));
+ DAG.getNode(OpCode, dl, VT, LHSVal.getValue(LHSVal.getResNo() + i));
+ if (Negate)
+ Values[i] = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT),
+ Values[i]);
}
} else {
for (unsigned i = 0; i != NumValues; ++i) {
@@ -3277,7 +3275,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
Ops.push_back(SDValue(RHSVal.getNode(), RHSVal.getResNo() + i));
Values[i] = DAG.getNode(
OpCode, getCurSDLoc(),
- LHSVal.getNode()->getValueType(LHSVal.getResNo() + i), Ops);
+ LHSVal.getNode()->getValueType(LHSVal.getResNo() + i), Ops, Flags);
}
}
@@ -3419,7 +3417,7 @@ void SelectionDAGBuilder::visitAddrSpaceCast(const User &I) {
unsigned SrcAS = SV->getType()->getPointerAddressSpace();
unsigned DestAS = I.getType()->getPointerAddressSpace();
- if (!TLI.isNoopAddrSpaceCast(SrcAS, DestAS))
+ if (!TM.isNoopAddrSpaceCast(SrcAS, DestAS))
N = DAG.getAddrSpaceCast(getCurSDLoc(), DestVT, N, SrcAS, DestAS);
setValue(&I, N);
@@ -3747,20 +3745,18 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
SDValue N = getValue(Op0);
SDLoc dl = getCurSDLoc();
auto &TLI = DAG.getTargetLoweringInfo();
- MVT PtrTy = TLI.getPointerTy(DAG.getDataLayout(), AS);
- MVT PtrMemTy = TLI.getPointerMemTy(DAG.getDataLayout(), AS);
// Normalize Vector GEP - all scalar operands should be converted to the
// splat vector.
bool IsVectorGEP = I.getType()->isVectorTy();
ElementCount VectorElementCount =
IsVectorGEP ? cast<VectorType>(I.getType())->getElementCount()
- : ElementCount(0, false);
+ : ElementCount::getFixed(0);
if (IsVectorGEP && !N.getValueType().isVector()) {
LLVMContext &Context = *DAG.getContext();
EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorElementCount);
- if (VectorElementCount.Scalable)
+ if (VectorElementCount.isScalable())
N = DAG.getSplatVector(VT, dl, N);
else
N = DAG.getSplatBuildVector(VT, dl, N);
@@ -3833,7 +3829,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
if (!IdxN.getValueType().isVector() && IsVectorGEP) {
EVT VT = EVT::getVectorVT(*Context, IdxN.getValueType(),
VectorElementCount);
- if (VectorElementCount.Scalable)
+ if (VectorElementCount.isScalable())
IdxN = DAG.getSplatVector(VT, dl, IdxN);
else
IdxN = DAG.getSplatBuildVector(VT, dl, IdxN);
@@ -3874,6 +3870,13 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
}
}
+ MVT PtrTy = TLI.getPointerTy(DAG.getDataLayout(), AS);
+ MVT PtrMemTy = TLI.getPointerMemTy(DAG.getDataLayout(), AS);
+ if (IsVectorGEP) {
+ PtrTy = MVT::getVectorVT(PtrTy, VectorElementCount);
+ PtrMemTy = MVT::getVectorVT(PtrMemTy, VectorElementCount);
+ }
+
if (PtrMemTy != PtrTy && !cast<GEPOperator>(I).isInBounds())
N = DAG.getPtrExtendInReg(N, dl, PtrMemTy);
@@ -4170,7 +4173,8 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
Root = Chain;
ChainI = 0;
}
- SDValue Add = DAG.getMemBasePlusOffset(Ptr, Offsets[i], dl, Flags);
+ SDValue Add =
+ DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(Offsets[i]), dl, Flags);
SDValue Val = SDValue(Src.getNode(), Src.getResNo() + i);
if (MemVTs[i] != ValueVTs[i])
Val = DAG.getPtrExtOrTrunc(Val, dl, MemVTs[i]);
@@ -4332,12 +4336,12 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
if (!UniformBase) {
Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
Index = getValue(Ptr);
- IndexType = ISD::SIGNED_SCALED;
+ IndexType = ISD::SIGNED_UNSCALED;
Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout()));
}
SDValue Ops[] = { getMemoryRoot(), Src0, Mask, Base, Index, Scale };
SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), VT, sdl,
- Ops, MMO, IndexType);
+ Ops, MMO, IndexType, false);
DAG.setRoot(Scatter);
setValue(&I, Scatter);
}
@@ -4385,7 +4389,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
// Do not serialize masked loads of constant memory with anything.
MemoryLocation ML;
if (VT.isScalableVector())
- ML = MemoryLocation(PtrOperand);
+ ML = MemoryLocation::getAfter(PtrOperand);
else
ML = MemoryLocation(PtrOperand, LocationSize::precise(
DAG.getDataLayout().getTypeStoreSize(I.getType())),
@@ -4443,12 +4447,12 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
if (!UniformBase) {
Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
Index = getValue(Ptr);
- IndexType = ISD::SIGNED_SCALED;
+ IndexType = ISD::SIGNED_UNSCALED;
Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout()));
}
SDValue Ops[] = { Root, Src0, Mask, Base, Index, Scale };
SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl,
- Ops, MMO, IndexType);
+ Ops, MMO, IndexType, ISD::NON_EXTLOAD);
PendingLoads.push_back(Gather.getValue(1));
setValue(&I, Gather);
@@ -4875,7 +4879,7 @@ static SDValue getLimitedPrecisionExp2(SDValue t0, const SDLoc &dl,
/// expandExp - Lower an exp intrinsic. Handles the special sequences for
/// limited-precision mode.
static SDValue expandExp(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
- const TargetLowering &TLI) {
+ const TargetLowering &TLI, SDNodeFlags Flags) {
if (Op.getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
@@ -4891,13 +4895,13 @@ static SDValue expandExp(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
}
// No special expansion.
- return DAG.getNode(ISD::FEXP, dl, Op.getValueType(), Op);
+ return DAG.getNode(ISD::FEXP, dl, Op.getValueType(), Op, Flags);
}
/// expandLog - Lower a log intrinsic. Handles the special sequences for
/// limited-precision mode.
static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
- const TargetLowering &TLI) {
+ const TargetLowering &TLI, SDNodeFlags Flags) {
// TODO: What fast-math-flags should be set on the floating-point nodes?
if (Op.getValueType() == MVT::f32 &&
@@ -4990,13 +4994,13 @@ static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
}
// No special expansion.
- return DAG.getNode(ISD::FLOG, dl, Op.getValueType(), Op);
+ return DAG.getNode(ISD::FLOG, dl, Op.getValueType(), Op, Flags);
}
/// expandLog2 - Lower a log2 intrinsic. Handles the special sequences for
/// limited-precision mode.
static SDValue expandLog2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
- const TargetLowering &TLI) {
+ const TargetLowering &TLI, SDNodeFlags Flags) {
// TODO: What fast-math-flags should be set on the floating-point nodes?
if (Op.getValueType() == MVT::f32 &&
@@ -5087,13 +5091,13 @@ static SDValue expandLog2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
}
// No special expansion.
- return DAG.getNode(ISD::FLOG2, dl, Op.getValueType(), Op);
+ return DAG.getNode(ISD::FLOG2, dl, Op.getValueType(), Op, Flags);
}
/// expandLog10 - Lower a log10 intrinsic. Handles the special sequences for
/// limited-precision mode.
static SDValue expandLog10(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
- const TargetLowering &TLI) {
+ const TargetLowering &TLI, SDNodeFlags Flags) {
// TODO: What fast-math-flags should be set on the floating-point nodes?
if (Op.getValueType() == MVT::f32 &&
@@ -5177,25 +5181,26 @@ static SDValue expandLog10(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
}
// No special expansion.
- return DAG.getNode(ISD::FLOG10, dl, Op.getValueType(), Op);
+ return DAG.getNode(ISD::FLOG10, dl, Op.getValueType(), Op, Flags);
}
/// expandExp2 - Lower an exp2 intrinsic. Handles the special sequences for
/// limited-precision mode.
static SDValue expandExp2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
- const TargetLowering &TLI) {
+ const TargetLowering &TLI, SDNodeFlags Flags) {
if (Op.getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18)
return getLimitedPrecisionExp2(Op, dl, DAG);
// No special expansion.
- return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op);
+ return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op, Flags);
}
/// visitPow - Lower a pow intrinsic. Handles the special sequences for
/// limited-precision mode with x == 10.0f.
static SDValue expandPow(const SDLoc &dl, SDValue LHS, SDValue RHS,
- SelectionDAG &DAG, const TargetLowering &TLI) {
+ SelectionDAG &DAG, const TargetLowering &TLI,
+ SDNodeFlags Flags) {
bool IsExp10 = false;
if (LHS.getValueType() == MVT::f32 && RHS.getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
@@ -5218,7 +5223,7 @@ static SDValue expandPow(const SDLoc &dl, SDValue LHS, SDValue RHS,
}
// No special expansion.
- return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS);
+ return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS, Flags);
}
/// ExpandPowI - Expand a llvm.powi intrinsic.
@@ -5343,7 +5348,7 @@ static SDValue expandDivFix(unsigned Opcode, const SDLoc &DL,
// getUnderlyingArgRegs - Find underlying registers used for a truncated,
// bitcasted, or split argument. Returns a list of <Register, size in bits>
static void
-getUnderlyingArgRegs(SmallVectorImpl<std::pair<unsigned, unsigned>> &Regs,
+getUnderlyingArgRegs(SmallVectorImpl<std::pair<unsigned, TypeSize>> &Regs,
const SDValue &N) {
switch (N.getOpcode()) {
case ISD::CopyFromReg: {
@@ -5454,7 +5459,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
if (FI != std::numeric_limits<int>::max())
Op = MachineOperand::CreateFI(FI);
- SmallVector<std::pair<unsigned, unsigned>, 8> ArgRegsAndSizes;
+ SmallVector<std::pair<unsigned, TypeSize>, 8> ArgRegsAndSizes;
if (!Op && N.getNode()) {
getUnderlyingArgRegs(ArgRegsAndSizes, N);
Register Reg;
@@ -5484,8 +5489,8 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
if (!Op) {
// Create a DBG_VALUE for each decomposed value in ArgRegs to cover Reg
- auto splitMultiRegDbgValue
- = [&](ArrayRef<std::pair<unsigned, unsigned>> SplitRegs) {
+ auto splitMultiRegDbgValue = [&](ArrayRef<std::pair<unsigned, TypeSize>>
+ SplitRegs) {
unsigned Offset = 0;
for (auto RegAndSize : SplitRegs) {
// If the expression is already a fragment, the current register
@@ -5639,6 +5644,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
DebugLoc dl = getCurDebugLoc();
SDValue Res;
+ SDNodeFlags Flags;
+ if (auto *FPOp = dyn_cast<FPMathOperator>(&I))
+ Flags.copyFMF(*FPOp);
+
switch (Intrinsic) {
default:
// By default, turn this into a target intrinsic node.
@@ -6053,23 +6062,26 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
getValue(I.getArgOperand(1)), DAG));
return;
case Intrinsic::log:
- setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
+ setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags));
return;
case Intrinsic::log2:
- setValue(&I, expandLog2(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
+ setValue(&I,
+ expandLog2(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags));
return;
case Intrinsic::log10:
- setValue(&I, expandLog10(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
+ setValue(&I,
+ expandLog10(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags));
return;
case Intrinsic::exp:
- setValue(&I, expandExp(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
+ setValue(&I, expandExp(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags));
return;
case Intrinsic::exp2:
- setValue(&I, expandExp2(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
+ setValue(&I,
+ expandExp2(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags));
return;
case Intrinsic::pow:
setValue(&I, expandPow(sdl, getValue(I.getArgOperand(0)),
- getValue(I.getArgOperand(1)), DAG, TLI));
+ getValue(I.getArgOperand(1)), DAG, TLI, Flags));
return;
case Intrinsic::sqrt:
case Intrinsic::fabs:
@@ -6102,7 +6114,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
setValue(&I, DAG.getNode(Opcode, sdl,
getValue(I.getArgOperand(0)).getValueType(),
- getValue(I.getArgOperand(0))));
+ getValue(I.getArgOperand(0)), Flags));
return;
}
case Intrinsic::lround:
@@ -6127,44 +6139,47 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
setValue(&I, DAG.getNode(ISD::FMINNUM, sdl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0)),
- getValue(I.getArgOperand(1))));
+ getValue(I.getArgOperand(1)), Flags));
return;
case Intrinsic::maxnum:
setValue(&I, DAG.getNode(ISD::FMAXNUM, sdl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0)),
- getValue(I.getArgOperand(1))));
+ getValue(I.getArgOperand(1)), Flags));
return;
case Intrinsic::minimum:
setValue(&I, DAG.getNode(ISD::FMINIMUM, sdl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0)),
- getValue(I.getArgOperand(1))));
+ getValue(I.getArgOperand(1)), Flags));
return;
case Intrinsic::maximum:
setValue(&I, DAG.getNode(ISD::FMAXIMUM, sdl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0)),
- getValue(I.getArgOperand(1))));
+ getValue(I.getArgOperand(1)), Flags));
return;
case Intrinsic::copysign:
setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0)),
- getValue(I.getArgOperand(1))));
+ getValue(I.getArgOperand(1)), Flags));
return;
case Intrinsic::fma:
- setValue(&I, DAG.getNode(ISD::FMA, sdl,
- getValue(I.getArgOperand(0)).getValueType(),
- getValue(I.getArgOperand(0)),
- getValue(I.getArgOperand(1)),
- getValue(I.getArgOperand(2))));
+ setValue(&I, DAG.getNode(
+ ISD::FMA, sdl, getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)),
+ getValue(I.getArgOperand(2)), Flags));
return;
#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \
case Intrinsic::INTRINSIC:
#include "llvm/IR/ConstrainedOps.def"
visitConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(I));
return;
+#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID:
+#include "llvm/IR/VPIntrinsics.def"
+ visitVectorPredicationIntrinsic(cast<VPIntrinsic>(I));
+ return;
case Intrinsic::fmuladd: {
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
@@ -6173,17 +6188,15 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)),
- getValue(I.getArgOperand(2))));
+ getValue(I.getArgOperand(2)), Flags));
} else {
// TODO: Intrinsic calls should have fast-math-flags.
- SDValue Mul = DAG.getNode(ISD::FMUL, sdl,
- getValue(I.getArgOperand(0)).getValueType(),
- getValue(I.getArgOperand(0)),
- getValue(I.getArgOperand(1)));
+ SDValue Mul = DAG.getNode(
+ ISD::FMUL, sdl, getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), Flags);
SDValue Add = DAG.getNode(ISD::FADD, sdl,
getValue(I.getArgOperand(0)).getValueType(),
- Mul,
- getValue(I.getArgOperand(2)));
+ Mul, getValue(I.getArgOperand(2)), Flags);
setValue(&I, Add);
}
return;
@@ -6201,6 +6214,20 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
DAG.getNode(ISD::BITCAST, sdl, MVT::f16,
getValue(I.getArgOperand(0)))));
return;
+ case Intrinsic::fptosi_sat: {
+ EVT Type = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ SDValue SatW = DAG.getConstant(Type.getScalarSizeInBits(), sdl, MVT::i32);
+ setValue(&I, DAG.getNode(ISD::FP_TO_SINT_SAT, sdl, Type,
+ getValue(I.getArgOperand(0)), SatW));
+ return;
+ }
+ case Intrinsic::fptoui_sat: {
+ EVT Type = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ SDValue SatW = DAG.getConstant(Type.getScalarSizeInBits(), sdl, MVT::i32);
+ setValue(&I, DAG.getNode(ISD::FP_TO_UINT_SAT, sdl, Type,
+ getValue(I.getArgOperand(0)), SatW));
+ return;
+ }
case Intrinsic::pcmarker: {
SDValue Tmp = getValue(I.getArgOperand(0));
DAG.setRoot(DAG.getNode(ISD::PCMARKER, sdl, MVT::Other, getRoot(), Tmp));
@@ -6253,62 +6280,14 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
SDValue Y = getValue(I.getArgOperand(1));
SDValue Z = getValue(I.getArgOperand(2));
EVT VT = X.getValueType();
- SDValue BitWidthC = DAG.getConstant(VT.getScalarSizeInBits(), sdl, VT);
- SDValue Zero = DAG.getConstant(0, sdl, VT);
- SDValue ShAmt = DAG.getNode(ISD::UREM, sdl, VT, Z, BitWidthC);
- auto FunnelOpcode = IsFSHL ? ISD::FSHL : ISD::FSHR;
- if (TLI.isOperationLegalOrCustom(FunnelOpcode, VT)) {
- setValue(&I, DAG.getNode(FunnelOpcode, sdl, VT, X, Y, Z));
- return;
- }
-
- // When X == Y, this is rotate. If the data type has a power-of-2 size, we
- // avoid the select that is necessary in the general case to filter out
- // the 0-shift possibility that leads to UB.
- if (X == Y && isPowerOf2_32(VT.getScalarSizeInBits())) {
+ if (X == Y) {
auto RotateOpcode = IsFSHL ? ISD::ROTL : ISD::ROTR;
- if (TLI.isOperationLegalOrCustom(RotateOpcode, VT)) {
- setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, Z));
- return;
- }
-
- // Some targets only rotate one way. Try the opposite direction.
- RotateOpcode = IsFSHL ? ISD::ROTR : ISD::ROTL;
- if (TLI.isOperationLegalOrCustom(RotateOpcode, VT)) {
- // Negate the shift amount because it is safe to ignore the high bits.
- SDValue NegShAmt = DAG.getNode(ISD::SUB, sdl, VT, Zero, Z);
- setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, NegShAmt));
- return;
- }
-
- // fshl (rotl): (X << (Z % BW)) | (X >> ((0 - Z) % BW))
- // fshr (rotr): (X << ((0 - Z) % BW)) | (X >> (Z % BW))
- SDValue NegZ = DAG.getNode(ISD::SUB, sdl, VT, Zero, Z);
- SDValue NShAmt = DAG.getNode(ISD::UREM, sdl, VT, NegZ, BitWidthC);
- SDValue ShX = DAG.getNode(ISD::SHL, sdl, VT, X, IsFSHL ? ShAmt : NShAmt);
- SDValue ShY = DAG.getNode(ISD::SRL, sdl, VT, X, IsFSHL ? NShAmt : ShAmt);
- setValue(&I, DAG.getNode(ISD::OR, sdl, VT, ShX, ShY));
- return;
+ setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, Z));
+ } else {
+ auto FunnelOpcode = IsFSHL ? ISD::FSHL : ISD::FSHR;
+ setValue(&I, DAG.getNode(FunnelOpcode, sdl, VT, X, Y, Z));
}
-
- // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
- // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
- SDValue InvShAmt = DAG.getNode(ISD::SUB, sdl, VT, BitWidthC, ShAmt);
- SDValue ShX = DAG.getNode(ISD::SHL, sdl, VT, X, IsFSHL ? ShAmt : InvShAmt);
- SDValue ShY = DAG.getNode(ISD::SRL, sdl, VT, Y, IsFSHL ? InvShAmt : ShAmt);
- SDValue Or = DAG.getNode(ISD::OR, sdl, VT, ShX, ShY);
-
- // If (Z % BW == 0), then the opposite direction shift is shift-by-bitwidth,
- // and that is undefined. We must compare and select to avoid UB.
- EVT CCVT = MVT::i1;
- if (VT.isVector())
- CCVT = EVT::getVectorVT(*Context, CCVT, VT.getVectorNumElements());
-
- // For fshl, 0-shift returns the 1st arg (X).
- // For fshr, 0-shift returns the 2nd arg (Y).
- SDValue IsZeroShift = DAG.getSetCC(sdl, CCVT, ShAmt, Zero, ISD::SETEQ);
- setValue(&I, DAG.getSelect(sdl, VT, IsZeroShift, IsFSHL ? X : Y, Or));
return;
}
case Intrinsic::sadd_sat: {
@@ -6335,6 +6314,18 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
setValue(&I, DAG.getNode(ISD::USUBSAT, sdl, Op1.getValueType(), Op1, Op2));
return;
}
+ case Intrinsic::sshl_sat: {
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ setValue(&I, DAG.getNode(ISD::SSHLSAT, sdl, Op1.getValueType(), Op1, Op2));
+ return;
+ }
+ case Intrinsic::ushl_sat: {
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ setValue(&I, DAG.getNode(ISD::USHLSAT, sdl, Op1.getValueType(), Op1, Op2));
+ return;
+ }
case Intrinsic::smul_fix:
case Intrinsic::umul_fix:
case Intrinsic::smul_fix_sat:
@@ -6357,6 +6348,36 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
Op1, Op2, Op3, DAG, TLI));
return;
}
+ case Intrinsic::smax: {
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ setValue(&I, DAG.getNode(ISD::SMAX, sdl, Op1.getValueType(), Op1, Op2));
+ return;
+ }
+ case Intrinsic::smin: {
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ setValue(&I, DAG.getNode(ISD::SMIN, sdl, Op1.getValueType(), Op1, Op2));
+ return;
+ }
+ case Intrinsic::umax: {
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ setValue(&I, DAG.getNode(ISD::UMAX, sdl, Op1.getValueType(), Op1, Op2));
+ return;
+ }
+ case Intrinsic::umin: {
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ setValue(&I, DAG.getNode(ISD::UMIN, sdl, Op1.getValueType(), Op1, Op2));
+ return;
+ }
+ case Intrinsic::abs: {
+ // TODO: Preserve "int min is poison" arg in SDAG?
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ setValue(&I, DAG.getNode(ISD::ABS, sdl, Op1.getValueType(), Op1));
+ return;
+ }
case Intrinsic::stacksave: {
SDValue Op = getRoot();
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
@@ -6375,7 +6396,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
EVT ResTy = TLI.getValueType(DAG.getDataLayout(), I.getType());
// Result type for @llvm.get.dynamic.area.offset should match PtrTy for
// target.
- if (PtrTy.getSizeInBits() < ResTy.getSizeInBits())
+ if (PtrTy.getFixedSizeInBits() < ResTy.getFixedSizeInBits())
report_fatal_error("Wrong result type for @llvm.get.dynamic.area.offset"
" intrinsic!");
Res = DAG.getNode(ISD::GET_DYNAMIC_AREA_OFFSET, sdl, DAG.getVTList(ResTy),
@@ -6393,7 +6414,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
} else {
EVT PtrTy = TLI.getValueType(DAG.getDataLayout(), I.getType());
const Value *Global = TLI.getSDagStackGuard(M);
- unsigned Align = DL->getPrefTypeAlignment(Global->getType());
+ Align Align = DL->getPrefTypeAlign(Global->getType());
Res = DAG.getLoad(PtrTy, sdl, Chain, getValue(Global),
MachinePointerInfo(Global, 0), Align,
MachineMemOperand::MOVolatile);
@@ -6424,9 +6445,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
SDValue FIN = DAG.getFrameIndex(FI, PtrTy);
// Store the stack protector onto the stack.
- Res = DAG.getStore(Chain, sdl, Src, FIN, MachinePointerInfo::getFixedStack(
- DAG.getMachineFunction(), FI),
- /* Alignment = */ 0, MachineMemOperand::MOVolatile);
+ Res = DAG.getStore(
+ Chain, sdl, Src, FIN,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
+ MaybeAlign(), MachineMemOperand::MOVolatile);
setValue(&I, Res);
DAG.setRoot(Res);
return;
@@ -6444,10 +6466,13 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
// Drop the intrinsic, but forward the value
setValue(&I, getValue(I.getOperand(0)));
return;
+
case Intrinsic::assume:
+ case Intrinsic::experimental_noalias_scope_decl:
case Intrinsic::var_annotation:
case Intrinsic::sideeffect:
- // Discard annotate attributes, assumptions, and artificial side-effects.
+ // Discard annotate attributes, noalias scope declarations, assumptions, and
+ // artificial side-effects.
return;
case Intrinsic::codeview_annotation: {
@@ -6508,6 +6533,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
setValue(&I, getValue(I.getArgOperand(0)));
return;
+ case Intrinsic::ubsantrap:
case Intrinsic::debugtrap:
case Intrinsic::trap: {
StringRef TrapFuncName =
@@ -6515,12 +6541,31 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
.getAttribute(AttributeList::FunctionIndex, "trap-func-name")
.getValueAsString();
if (TrapFuncName.empty()) {
- ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ?
- ISD::TRAP : ISD::DEBUGTRAP;
- DAG.setRoot(DAG.getNode(Op, sdl,MVT::Other, getRoot()));
+ switch (Intrinsic) {
+ case Intrinsic::trap:
+ DAG.setRoot(DAG.getNode(ISD::TRAP, sdl, MVT::Other, getRoot()));
+ break;
+ case Intrinsic::debugtrap:
+ DAG.setRoot(DAG.getNode(ISD::DEBUGTRAP, sdl, MVT::Other, getRoot()));
+ break;
+ case Intrinsic::ubsantrap:
+ DAG.setRoot(DAG.getNode(
+ ISD::UBSANTRAP, sdl, MVT::Other, getRoot(),
+ DAG.getTargetConstant(
+ cast<ConstantInt>(I.getArgOperand(0))->getZExtValue(), sdl,
+ MVT::i32)));
+ break;
+ default: llvm_unreachable("unknown trap intrinsic");
+ }
return;
}
TargetLowering::ArgListTy Args;
+ if (Intrinsic == Intrinsic::ubsantrap) {
+ Args.push_back(TargetLoweringBase::ArgListEntry());
+ Args[0].Val = I.getArgOperand(0);
+ Args[0].Node = getValue(Args[0].Val);
+ Args[0].Ty = Args[0].Val->getType();
+ }
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee(
@@ -6557,7 +6602,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
EVT OverflowVT = MVT::i1;
if (ResultVT.isVector())
OverflowVT = EVT::getVectorVT(
- *Context, OverflowVT, ResultVT.getVectorNumElements());
+ *Context, OverflowVT, ResultVT.getVectorElementCount());
SDVTList VTs = DAG.getVTList(ResultVT, OverflowVT);
setValue(&I, DAG.getNode(Op, sdl, VTs, Op1, Op2));
@@ -6595,7 +6640,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
cast<ConstantInt>(I.getArgOperand(0))->getSExtValue();
Value *const ObjectPtr = I.getArgOperand(1);
SmallVector<const Value *, 4> Allocas;
- GetUnderlyingObjects(ObjectPtr, Allocas, *DL);
+ getUnderlyingObjects(ObjectPtr, Allocas);
for (SmallVectorImpl<const Value*>::iterator Object = Allocas.begin(),
E = Allocas.end(); Object != E; ++Object) {
@@ -6622,6 +6667,14 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
}
return;
}
+ case Intrinsic::pseudoprobe: {
+ auto Guid = cast<ConstantInt>(I.getArgOperand(0))->getZExtValue();
+ auto Index = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
+ auto Attr = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
+ Res = DAG.getPseudoProbeNode(sdl, getRoot(), Guid, Index, Attr);
+ DAG.setRoot(Res);
+ return;
+ }
case Intrinsic::invariant_start:
// Discard region information.
setValue(&I, DAG.getUNDEF(TLI.getPointerTy(DAG.getDataLayout())));
@@ -6732,7 +6785,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
// specific calling convention, and only for x86_64.
// FIXME: Support other platforms later.
const auto &Triple = DAG.getTarget().getTargetTriple();
- if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux())
+ if (Triple.getArch() != Triple::x86_64)
return;
SDLoc DL = getCurSDLoc();
@@ -6763,7 +6816,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
// specific calling convention, and only for x86_64.
// FIXME: Support other platforms later.
const auto &Triple = DAG.getTarget().getTargetTriple();
- if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux())
+ if (Triple.getArch() != Triple::x86_64)
return;
SDLoc DL = getCurSDLoc();
@@ -6797,19 +6850,19 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
LowerDeoptimizeCall(&I);
return;
- case Intrinsic::experimental_vector_reduce_v2_fadd:
- case Intrinsic::experimental_vector_reduce_v2_fmul:
- case Intrinsic::experimental_vector_reduce_add:
- case Intrinsic::experimental_vector_reduce_mul:
- case Intrinsic::experimental_vector_reduce_and:
- case Intrinsic::experimental_vector_reduce_or:
- case Intrinsic::experimental_vector_reduce_xor:
- case Intrinsic::experimental_vector_reduce_smax:
- case Intrinsic::experimental_vector_reduce_smin:
- case Intrinsic::experimental_vector_reduce_umax:
- case Intrinsic::experimental_vector_reduce_umin:
- case Intrinsic::experimental_vector_reduce_fmax:
- case Intrinsic::experimental_vector_reduce_fmin:
+ case Intrinsic::vector_reduce_fadd:
+ case Intrinsic::vector_reduce_fmul:
+ case Intrinsic::vector_reduce_add:
+ case Intrinsic::vector_reduce_mul:
+ case Intrinsic::vector_reduce_and:
+ case Intrinsic::vector_reduce_or:
+ case Intrinsic::vector_reduce_xor:
+ case Intrinsic::vector_reduce_smax:
+ case Intrinsic::vector_reduce_smin:
+ case Intrinsic::vector_reduce_umax:
+ case Intrinsic::vector_reduce_umin:
+ case Intrinsic::vector_reduce_fmax:
+ case Intrinsic::vector_reduce_fmin:
visitVectorReduce(I, Intrinsic);
return;
@@ -6897,36 +6950,57 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::get_active_lane_mask: {
auto DL = getCurSDLoc();
SDValue Index = getValue(I.getOperand(0));
- SDValue BTC = getValue(I.getOperand(1));
+ SDValue TripCount = getValue(I.getOperand(1));
Type *ElementTy = I.getOperand(0)->getType();
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
unsigned VecWidth = VT.getVectorNumElements();
- SmallVector<SDValue, 16> OpsBTC;
+ SmallVector<SDValue, 16> OpsTripCount;
SmallVector<SDValue, 16> OpsIndex;
SmallVector<SDValue, 16> OpsStepConstants;
for (unsigned i = 0; i < VecWidth; i++) {
- OpsBTC.push_back(BTC);
+ OpsTripCount.push_back(TripCount);
OpsIndex.push_back(Index);
- OpsStepConstants.push_back(DAG.getConstant(i, DL, MVT::getVT(ElementTy)));
+ OpsStepConstants.push_back(
+ DAG.getConstant(i, DL, EVT::getEVT(ElementTy)));
}
- EVT CCVT = MVT::i1;
- CCVT = EVT::getVectorVT(I.getContext(), CCVT, VecWidth);
+ EVT CCVT = EVT::getVectorVT(I.getContext(), MVT::i1, VecWidth);
- auto VecTy = MVT::getVT(FixedVectorType::get(ElementTy, VecWidth));
+ auto VecTy = EVT::getEVT(FixedVectorType::get(ElementTy, VecWidth));
SDValue VectorIndex = DAG.getBuildVector(VecTy, DL, OpsIndex);
SDValue VectorStep = DAG.getBuildVector(VecTy, DL, OpsStepConstants);
SDValue VectorInduction = DAG.getNode(
ISD::UADDO, DL, DAG.getVTList(VecTy, CCVT), VectorIndex, VectorStep);
- SDValue VectorBTC = DAG.getBuildVector(VecTy, DL, OpsBTC);
+ SDValue VectorTripCount = DAG.getBuildVector(VecTy, DL, OpsTripCount);
SDValue SetCC = DAG.getSetCC(DL, CCVT, VectorInduction.getValue(0),
- VectorBTC, ISD::CondCode::SETULE);
+ VectorTripCount, ISD::CondCode::SETULT);
setValue(&I, DAG.getNode(ISD::AND, DL, CCVT,
DAG.getNOT(DL, VectorInduction.getValue(1), CCVT),
SetCC));
return;
}
+ case Intrinsic::experimental_vector_insert: {
+ auto DL = getCurSDLoc();
+
+ SDValue Vec = getValue(I.getOperand(0));
+ SDValue SubVec = getValue(I.getOperand(1));
+ SDValue Index = getValue(I.getOperand(2));
+ EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ setValue(&I, DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ResultVT, Vec, SubVec,
+ Index));
+ return;
+ }
+ case Intrinsic::experimental_vector_extract: {
+ auto DL = getCurSDLoc();
+
+ SDValue Vec = getValue(I.getOperand(0));
+ SDValue Index = getValue(I.getOperand(1));
+ EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+
+ setValue(&I, DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResultVT, Vec, Index));
+ return;
+ }
}
}
@@ -7042,6 +7116,41 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
setValue(&FPI, FPResult);
}
+static unsigned getISDForVPIntrinsic(const VPIntrinsic &VPIntrin) {
+ Optional<unsigned> ResOPC;
+ switch (VPIntrin.getIntrinsicID()) {
+#define BEGIN_REGISTER_VP_INTRINSIC(INTRIN, ...) case Intrinsic::INTRIN:
+#define BEGIN_REGISTER_VP_SDNODE(VPSDID, ...) ResOPC = ISD::VPSDID;
+#define END_REGISTER_VP_INTRINSIC(...) break;
+#include "llvm/IR/VPIntrinsics.def"
+ }
+
+ if (!ResOPC.hasValue())
+ llvm_unreachable(
+ "Inconsistency: no SDNode available for this VPIntrinsic!");
+
+ return ResOPC.getValue();
+}
+
+void SelectionDAGBuilder::visitVectorPredicationIntrinsic(
+ const VPIntrinsic &VPIntrin) {
+ unsigned Opcode = getISDForVPIntrinsic(VPIntrin);
+
+ SmallVector<EVT, 4> ValueVTs;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ ComputeValueVTs(TLI, DAG.getDataLayout(), VPIntrin.getType(), ValueVTs);
+ SDVTList VTs = DAG.getVTList(ValueVTs);
+
+ // Request operands.
+ SmallVector<SDValue, 7> OpValues;
+ for (int i = 0; i < (int)VPIntrin.getNumArgOperands(); ++i)
+ OpValues.push_back(getValue(VPIntrin.getArgOperand(i)));
+
+ SDLoc DL = getCurSDLoc();
+ SDValue Result = DAG.getNode(Opcode, DL, VTs, OpValues);
+ setValue(&VPIntrin, Result);
+}
+
std::pair<SDValue, SDValue>
SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
const BasicBlock *EHPadBB) {
@@ -7258,9 +7367,9 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
}
SDValue Ptr = Builder.getValue(PtrVal);
- SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurSDLoc(), Root,
- Ptr, MachinePointerInfo(PtrVal),
- /* Alignment = */ 1);
+ SDValue LoadVal =
+ Builder.DAG.getLoad(LoadVT, Builder.getCurSDLoc(), Root, Ptr,
+ MachinePointerInfo(PtrVal), Align(1));
if (!ConstantMemory)
Builder.PendingLoads.push_back(LoadVal.getValue(1));
@@ -7281,12 +7390,12 @@ void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I,
setValue(&I, Value);
}
-/// See if we can lower a memcmp call into an optimized form. If so, return
+/// See if we can lower a memcmp/bcmp call into an optimized form. If so, return
/// true and lower it. Otherwise return false, and it will be lowered like a
/// normal call.
/// The caller already checked that \p I calls the appropriate LibFunc with a
/// correct prototype.
-bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
+bool SelectionDAGBuilder::visitMemCmpBCmpCall(const CallInst &I) {
const Value *LHS = I.getArgOperand(0), *RHS = I.getArgOperand(1);
const Value *Size = I.getArgOperand(2);
const ConstantInt *CSize = dyn_cast<ConstantInt>(Size);
@@ -7537,8 +7646,12 @@ bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I,
if (!I.onlyReadsMemory())
return false;
+ SDNodeFlags Flags;
+ Flags.copyFMF(cast<FPMathOperator>(I));
+
SDValue Tmp = getValue(I.getArgOperand(0));
- setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), Tmp.getValueType(), Tmp));
+ setValue(&I,
+ DAG.getNode(Opcode, getCurSDLoc(), Tmp.getValueType(), Tmp, Flags));
return true;
}
@@ -7553,10 +7666,13 @@ bool SelectionDAGBuilder::visitBinaryFloatCall(const CallInst &I,
if (!I.onlyReadsMemory())
return false;
+ SDNodeFlags Flags;
+ Flags.copyFMF(cast<FPMathOperator>(I));
+
SDValue Tmp0 = getValue(I.getArgOperand(0));
SDValue Tmp1 = getValue(I.getArgOperand(1));
EVT VT = Tmp0.getValueType();
- setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), VT, Tmp0, Tmp1));
+ setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), VT, Tmp0, Tmp1, Flags));
return true;
}
@@ -7590,6 +7706,10 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
LibInfo->hasOptimizedCodeGen(Func)) {
switch (Func) {
default: break;
+ case LibFunc_bcmp:
+ if (visitMemCmpBCmpCall(I))
+ return;
+ break;
case LibFunc_copysign:
case LibFunc_copysignf:
case LibFunc_copysignl:
@@ -7691,7 +7811,7 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
return;
break;
case LibFunc_memcmp:
- if (visitMemCmpCall(I))
+ if (visitMemCmpBCmpCall(I))
return;
break;
case LibFunc_mempcpy:
@@ -8111,10 +8231,9 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call) {
OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
}
- OpInfo.ConstraintVT =
- OpInfo
- .getCallOperandValEVT(*DAG.getContext(), TLI, DAG.getDataLayout())
- .getSimpleVT();
+ EVT VT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI,
+ DAG.getDataLayout());
+ OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
} else if (OpInfo.Type == InlineAsm::isOutput && !OpInfo.isIndirect) {
// The return value of the call is this value. As such, there is no
// corresponding argument.
@@ -8376,7 +8495,7 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call) {
InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size());
AsmNodeOperands.push_back(DAG.getTargetConstant(
ResOpType, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout())));
- AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end());
+ llvm::append_range(AsmNodeOperands, Ops);
break;
}
@@ -8956,57 +9075,59 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I,
SDLoc dl = getCurSDLoc();
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
SDValue Res;
- FastMathFlags FMF;
- if (isa<FPMathOperator>(I))
- FMF = I.getFastMathFlags();
+ SDNodeFlags SDFlags;
+ if (auto *FPMO = dyn_cast<FPMathOperator>(&I))
+ SDFlags.copyFMF(*FPMO);
switch (Intrinsic) {
- case Intrinsic::experimental_vector_reduce_v2_fadd:
- if (FMF.allowReassoc())
+ case Intrinsic::vector_reduce_fadd:
+ if (SDFlags.hasAllowReassociation())
Res = DAG.getNode(ISD::FADD, dl, VT, Op1,
- DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2));
+ DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2, SDFlags),
+ SDFlags);
else
- Res = DAG.getNode(ISD::VECREDUCE_STRICT_FADD, dl, VT, Op1, Op2);
+ Res = DAG.getNode(ISD::VECREDUCE_SEQ_FADD, dl, VT, Op1, Op2, SDFlags);
break;
- case Intrinsic::experimental_vector_reduce_v2_fmul:
- if (FMF.allowReassoc())
+ case Intrinsic::vector_reduce_fmul:
+ if (SDFlags.hasAllowReassociation())
Res = DAG.getNode(ISD::FMUL, dl, VT, Op1,
- DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2));
+ DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2, SDFlags),
+ SDFlags);
else
- Res = DAG.getNode(ISD::VECREDUCE_STRICT_FMUL, dl, VT, Op1, Op2);
+ Res = DAG.getNode(ISD::VECREDUCE_SEQ_FMUL, dl, VT, Op1, Op2, SDFlags);
break;
- case Intrinsic::experimental_vector_reduce_add:
+ case Intrinsic::vector_reduce_add:
Res = DAG.getNode(ISD::VECREDUCE_ADD, dl, VT, Op1);
break;
- case Intrinsic::experimental_vector_reduce_mul:
+ case Intrinsic::vector_reduce_mul:
Res = DAG.getNode(ISD::VECREDUCE_MUL, dl, VT, Op1);
break;
- case Intrinsic::experimental_vector_reduce_and:
+ case Intrinsic::vector_reduce_and:
Res = DAG.getNode(ISD::VECREDUCE_AND, dl, VT, Op1);
break;
- case Intrinsic::experimental_vector_reduce_or:
+ case Intrinsic::vector_reduce_or:
Res = DAG.getNode(ISD::VECREDUCE_OR, dl, VT, Op1);
break;
- case Intrinsic::experimental_vector_reduce_xor:
+ case Intrinsic::vector_reduce_xor:
Res = DAG.getNode(ISD::VECREDUCE_XOR, dl, VT, Op1);
break;
- case Intrinsic::experimental_vector_reduce_smax:
+ case Intrinsic::vector_reduce_smax:
Res = DAG.getNode(ISD::VECREDUCE_SMAX, dl, VT, Op1);
break;
- case Intrinsic::experimental_vector_reduce_smin:
+ case Intrinsic::vector_reduce_smin:
Res = DAG.getNode(ISD::VECREDUCE_SMIN, dl, VT, Op1);
break;
- case Intrinsic::experimental_vector_reduce_umax:
+ case Intrinsic::vector_reduce_umax:
Res = DAG.getNode(ISD::VECREDUCE_UMAX, dl, VT, Op1);
break;
- case Intrinsic::experimental_vector_reduce_umin:
+ case Intrinsic::vector_reduce_umin:
Res = DAG.getNode(ISD::VECREDUCE_UMIN, dl, VT, Op1);
break;
- case Intrinsic::experimental_vector_reduce_fmax:
- Res = DAG.getNode(ISD::VECREDUCE_FMAX, dl, VT, Op1);
+ case Intrinsic::vector_reduce_fmax:
+ Res = DAG.getNode(ISD::VECREDUCE_FMAX, dl, VT, Op1, SDFlags);
break;
- case Intrinsic::experimental_vector_reduce_fmin:
- Res = DAG.getNode(ISD::VECREDUCE_FMIN, dl, VT, Op1);
+ case Intrinsic::vector_reduce_fmin:
+ Res = DAG.getNode(ISD::VECREDUCE_FMIN, dl, VT, Op1, SDFlags);
break;
default:
llvm_unreachable("Unhandled vector reduce intrinsic");
@@ -9093,6 +9214,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
Entry.IsSRet = true;
Entry.IsNest = false;
Entry.IsByVal = false;
+ Entry.IsByRef = false;
Entry.IsReturned = false;
Entry.IsSwiftSelf = false;
Entry.IsSwiftError = false;
@@ -9213,6 +9335,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
Flags.setCFGuardTarget();
if (Args[i].IsByVal)
Flags.setByVal();
+ if (Args[i].IsByRef)
+ Flags.setByRef();
if (Args[i].IsPreallocated) {
Flags.setPreallocated();
// Set the byval flag for CCAssignFn callbacks that don't know about
@@ -9418,11 +9542,33 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
return std::make_pair(Res, CLI.Chain);
}
+/// Places new result values for the node in Results (their number
+/// and types must exactly match those of the original return values of
+/// the node), or leaves Results empty, which indicates that the node is not
+/// to be custom lowered after all.
void TargetLowering::LowerOperationWrapper(SDNode *N,
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const {
- if (SDValue Res = LowerOperation(SDValue(N, 0), DAG))
+ SDValue Res = LowerOperation(SDValue(N, 0), DAG);
+
+ if (!Res.getNode())
+ return;
+
+ // If the original node has one result, take the return value from
+ // LowerOperation as is. It might not be result number 0.
+ if (N->getNumValues() == 1) {
Results.push_back(Res);
+ return;
+ }
+
+ // If the original node has multiple results, then the return node should
+ // have the same number of results.
+ assert((N->getNumValues() == Res->getNumValues()) &&
+ "Lowering returned the wrong number of results!");
+
+ // Places new result values base on N result number.
+ for (unsigned I = 0, E = N->getNumValues(); I != E; ++I)
+ Results.push_back(Res.getValue(I));
}
SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
@@ -9545,7 +9691,7 @@ findArgumentCopyElisionCandidates(const DataLayout &DL,
// initializes the alloca. Don't elide copies from the same argument twice.
const Value *Val = SI->getValueOperand()->stripPointerCasts();
const auto *Arg = dyn_cast<Argument>(Val);
- if (!Arg || Arg->hasPassPointeeByValueAttr() ||
+ if (!Arg || Arg->hasPassPointeeByValueCopyAttr() ||
Arg->getType()->isEmptyTy() ||
DL.getTypeStoreSize(Arg->getType()) !=
DL.getTypeAllocSize(AI->getAllocatedType()) ||
@@ -9726,6 +9872,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
Flags.setSwiftError();
if (Arg.hasAttribute(Attribute::ByVal))
Flags.setByVal();
+ if (Arg.hasAttribute(Attribute::ByRef))
+ Flags.setByRef();
if (Arg.hasAttribute(Attribute::InAlloca)) {
Flags.setInAlloca();
// Set the byval flag for CCAssignFn callbacks that don't know about
@@ -9744,27 +9892,31 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// preallocated handling in the various CC lowering callbacks.
Flags.setByVal();
}
- if (F.getCallingConv() == CallingConv::X86_INTR) {
- // IA Interrupt passes frame (1st parameter) by value in the stack.
- if (ArgNo == 0)
- Flags.setByVal();
- }
- if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated()) {
- Type *ElementTy = Arg.getParamByValType();
- // For ByVal, size and alignment should be passed from FE. BE will
- // guess if this info is not there but there are cases it cannot get
- // right.
- unsigned FrameSize = DL.getTypeAllocSize(Arg.getParamByValType());
- Flags.setByValSize(FrameSize);
+ Type *ArgMemTy = nullptr;
+ if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated() ||
+ Flags.isByRef()) {
+ if (!ArgMemTy)
+ ArgMemTy = Arg.getPointeeInMemoryValueType();
- unsigned FrameAlign;
- if (Arg.getParamAlignment())
- FrameAlign = Arg.getParamAlignment();
- else
- FrameAlign = TLI->getByValTypeAlignment(ElementTy, DL);
- Flags.setByValAlign(Align(FrameAlign));
+ uint64_t MemSize = DL.getTypeAllocSize(ArgMemTy);
+
+ // For in-memory arguments, size and alignment should be passed from FE.
+ // BE will guess if this info is not there but there are cases it cannot
+ // get right.
+ MaybeAlign MemAlign = Arg.getParamAlign();
+ if (!MemAlign)
+ MemAlign = Align(TLI->getByValTypeAlignment(ArgMemTy, DL));
+
+ if (Flags.isByRef()) {
+ Flags.setByRefSize(MemSize);
+ Flags.setByRefAlign(*MemAlign);
+ } else {
+ Flags.setByValSize(MemSize);
+ Flags.setByValAlign(*MemAlign);
+ }
}
+
if (Arg.hasAttribute(Attribute::Nest))
Flags.setNest();
if (NeedsRegBlock)
@@ -10641,8 +10793,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
{PeeledSwitchMBB, First, Last, nullptr, nullptr, DefaultProb});
while (!WorkList.empty()) {
- SwitchWorkListItem W = WorkList.back();
- WorkList.pop_back();
+ SwitchWorkListItem W = WorkList.pop_back_val();
unsigned NumClusters = W.LastCluster - W.FirstCluster + 1;
if (NumClusters > 3 && TM.getOptLevel() != CodeGenOpt::None &&
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index f0b7fb0d5229..8f6e98c40161 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -18,7 +18,6 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/SwitchLoweringUtils.h"
@@ -26,7 +25,6 @@
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Instruction.h"
-#include "llvm/IR/Statepoint.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/ErrorHandling.h"
@@ -39,6 +37,7 @@
namespace llvm {
+class AAResults;
class AllocaInst;
class AtomicCmpXchgInst;
class AtomicRMWInst;
@@ -63,6 +62,7 @@ class FunctionLoweringInfo;
class GCFunctionInfo;
class GCRelocateInst;
class GCResultInst;
+class GCStatepointInst;
class IndirectBrInst;
class InvokeInst;
class LandingPadInst;
@@ -388,7 +388,7 @@ public:
SelectionDAG &DAG;
const DataLayout *DL = nullptr;
- AliasAnalysis *AA = nullptr;
+ AAResults *AA = nullptr;
const TargetLibraryInfo *LibInfo;
class SDAGSwitchLowering : public SwitchCG::SwitchLowering {
@@ -442,7 +442,7 @@ public:
SL(std::make_unique<SDAGSwitchLowering>(this, funcinfo)), FuncInfo(funcinfo),
SwiftError(swifterror) {}
- void init(GCFunctionInfo *gfi, AliasAnalysis *AA,
+ void init(GCFunctionInfo *gfi, AAResults *AA,
const TargetLibraryInfo *li);
/// Clear out the current SelectionDAG and the associated state and prepare
@@ -518,13 +518,6 @@ public:
SDValue getValue(const Value *V);
- /// Return the SDNode for the specified IR value if it exists.
- SDNode *getNodeForIRValue(const Value *V) {
- if (NodeMap.find(V) == NodeMap.end())
- return nullptr;
- return NodeMap[V].getNode();
- }
-
SDValue getNonRegisterValue(const Value *V);
SDValue getValueImpl(const Value *V);
@@ -692,7 +685,7 @@ private:
void visitAdd(const User &I) { visitBinary(I, ISD::ADD); }
void visitFAdd(const User &I) { visitBinary(I, ISD::FADD); }
void visitSub(const User &I) { visitBinary(I, ISD::SUB); }
- void visitFSub(const User &I);
+ void visitFSub(const User &I) { visitBinary(I, ISD::FSUB); }
void visitMul(const User &I) { visitBinary(I, ISD::MUL); }
void visitFMul(const User &I) { visitBinary(I, ISD::FMUL); }
void visitURem(const User &I) { visitBinary(I, ISD::UREM); }
@@ -747,7 +740,7 @@ private:
void visitFence(const FenceInst &I);
void visitPHI(const PHINode &I);
void visitCall(const CallInst &I);
- bool visitMemCmpCall(const CallInst &I);
+ bool visitMemCmpBCmpCall(const CallInst &I);
bool visitMemPCpyCall(const CallInst &I);
bool visitMemChrCall(const CallInst &I);
bool visitStrCpyCall(const CallInst &I, bool isStpcpy);
@@ -766,6 +759,7 @@ private:
void visitIntrinsicCall(const CallInst &I, unsigned Intrinsic);
void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic);
void visitConstrainedFPIntrinsic(const ConstrainedFPIntrinsic &FPI);
+ void visitVectorPredicationIntrinsic(const VPIntrinsic &VPIntrin);
void visitVAStart(const CallInst &I);
void visitVAArg(const VAArgInst &I);
@@ -902,7 +896,7 @@ struct RegsForValue {
}
/// Return a list of registers and their sizes.
- SmallVector<std::pair<unsigned, unsigned>, 4> getRegsAndSizes() const;
+ SmallVector<std::pair<unsigned, TypeSize>, 4> getRegsAndSizes() const;
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 42e3016e65b8..d867f3e09e9c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -293,6 +293,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::ADDC: return "addc";
case ISD::ADDE: return "adde";
case ISD::ADDCARRY: return "addcarry";
+ case ISD::SADDO_CARRY: return "saddo_carry";
case ISD::SADDO: return "saddo";
case ISD::UADDO: return "uaddo";
case ISD::SSUBO: return "ssubo";
@@ -302,6 +303,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::SUBC: return "subc";
case ISD::SUBE: return "sube";
case ISD::SUBCARRY: return "subcarry";
+ case ISD::SSUBO_CARRY: return "ssubo_carry";
case ISD::SHL_PARTS: return "shl_parts";
case ISD::SRA_PARTS: return "sra_parts";
case ISD::SRL_PARTS: return "srl_parts";
@@ -310,6 +312,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::UADDSAT: return "uaddsat";
case ISD::SSUBSAT: return "ssubsat";
case ISD::USUBSAT: return "usubsat";
+ case ISD::SSHLSAT: return "sshlsat";
+ case ISD::USHLSAT: return "ushlsat";
case ISD::SMULFIX: return "smulfix";
case ISD::SMULFIXSAT: return "smulfixsat";
@@ -344,6 +348,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::STRICT_FP_TO_SINT: return "strict_fp_to_sint";
case ISD::FP_TO_UINT: return "fp_to_uint";
case ISD::STRICT_FP_TO_UINT: return "strict_fp_to_uint";
+ case ISD::FP_TO_SINT_SAT: return "fp_to_sint_sat";
+ case ISD::FP_TO_UINT_SAT: return "fp_to_uint_sat";
case ISD::BITCAST: return "bitcast";
case ISD::ADDRSPACECAST: return "addrspacecast";
case ISD::FP16_TO_FP: return "fp16_to_fp";
@@ -390,8 +396,11 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::STACKRESTORE: return "stackrestore";
case ISD::TRAP: return "trap";
case ISD::DEBUGTRAP: return "debugtrap";
+ case ISD::UBSANTRAP: return "ubsantrap";
case ISD::LIFETIME_START: return "lifetime.start";
case ISD::LIFETIME_END: return "lifetime.end";
+ case ISD::PSEUDO_PROBE:
+ return "pseudoprobe";
case ISD::GC_TRANSITION_START: return "gc_transition.start";
case ISD::GC_TRANSITION_END: return "gc_transition.end";
case ISD::GET_DYNAMIC_AREA_OFFSET: return "get.dynamic.area.offset";
@@ -410,6 +419,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::CTTZ_ZERO_UNDEF: return "cttz_zero_undef";
case ISD::CTLZ: return "ctlz";
case ISD::CTLZ_ZERO_UNDEF: return "ctlz_zero_undef";
+ case ISD::PARITY: return "parity";
// Trampolines
case ISD::INIT_TRAMPOLINE: return "init_trampoline";
@@ -447,9 +457,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::SETFALSE2: return "setfalse2";
}
case ISD::VECREDUCE_FADD: return "vecreduce_fadd";
- case ISD::VECREDUCE_STRICT_FADD: return "vecreduce_strict_fadd";
+ case ISD::VECREDUCE_SEQ_FADD: return "vecreduce_seq_fadd";
case ISD::VECREDUCE_FMUL: return "vecreduce_fmul";
- case ISD::VECREDUCE_STRICT_FMUL: return "vecreduce_strict_fmul";
+ case ISD::VECREDUCE_SEQ_FMUL: return "vecreduce_seq_fmul";
case ISD::VECREDUCE_ADD: return "vecreduce_add";
case ISD::VECREDUCE_MUL: return "vecreduce_mul";
case ISD::VECREDUCE_AND: return "vecreduce_and";
@@ -461,6 +471,12 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::VECREDUCE_UMIN: return "vecreduce_umin";
case ISD::VECREDUCE_FMAX: return "vecreduce_fmax";
case ISD::VECREDUCE_FMIN: return "vecreduce_fmin";
+
+ // Vector Predication
+#define BEGIN_REGISTER_VP_SDNODE(SDID, LEGALARG, NAME, ...) \
+ case ISD::SDID: \
+ return #NAME;
+#include "llvm/IR/VPIntrinsics.def"
}
}
@@ -730,7 +746,38 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
OS << ", compressing";
OS << ">";
- } else if (const MemSDNode* M = dyn_cast<MemSDNode>(this)) {
+ } else if (const auto *MGather = dyn_cast<MaskedGatherSDNode>(this)) {
+ OS << "<";
+ printMemOperand(OS, *MGather->getMemOperand(), G);
+
+ bool doExt = true;
+ switch (MGather->getExtensionType()) {
+ default: doExt = false; break;
+ case ISD::EXTLOAD: OS << ", anyext"; break;
+ case ISD::SEXTLOAD: OS << ", sext"; break;
+ case ISD::ZEXTLOAD: OS << ", zext"; break;
+ }
+ if (doExt)
+ OS << " from " << MGather->getMemoryVT().getEVTString();
+
+ auto Signed = MGather->isIndexSigned() ? "signed" : "unsigned";
+ auto Scaled = MGather->isIndexScaled() ? "scaled" : "unscaled";
+ OS << ", " << Signed << " " << Scaled << " offset";
+
+ OS << ">";
+ } else if (const auto *MScatter = dyn_cast<MaskedScatterSDNode>(this)) {
+ OS << "<";
+ printMemOperand(OS, *MScatter->getMemOperand(), G);
+
+ if (MScatter->isTruncatingStore())
+ OS << ", trunc to " << MScatter->getMemoryVT().getEVTString();
+
+ auto Signed = MScatter->isIndexSigned() ? "signed" : "unsigned";
+ auto Scaled = MScatter->isIndexScaled() ? "scaled" : "unscaled";
+ OS << ", " << Signed << " " << Scaled << " offset";
+
+ OS << ">";
+ } else if (const MemSDNode *M = dyn_cast<MemSDNode>(this)) {
OS << "<";
printMemOperand(OS, *M->getMemOperand(), G);
OS << ">";
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 1f0432196a2d..7bae5048fc0e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -75,6 +75,7 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsWebAssembly.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Statepoint.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
@@ -778,6 +779,11 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
<< "'\n";
CurDAG->dump());
+#ifndef NDEBUG
+ if (TTI.hasBranchDivergence())
+ CurDAG->VerifyDAGDiverence();
+#endif
+
if (ViewDAGCombine1 && MatchFilterBB)
CurDAG->viewGraph("dag-combine1 input for " + BlockName);
@@ -788,16 +794,16 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->Combine(BeforeLegalizeTypes, AA, OptLevel);
}
-#ifndef NDEBUG
- if (TTI.hasBranchDivergence())
- CurDAG->VerifyDAGDiverence();
-#endif
-
LLVM_DEBUG(dbgs() << "Optimized lowered selection DAG: "
<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
<< "'\n";
CurDAG->dump());
+#ifndef NDEBUG
+ if (TTI.hasBranchDivergence())
+ CurDAG->VerifyDAGDiverence();
+#endif
+
// Second step, hack on the DAG until it only uses operations and types that
// the target supports.
if (ViewLegalizeTypesDAGs && MatchFilterBB)
@@ -810,16 +816,16 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
Changed = CurDAG->LegalizeTypes();
}
-#ifndef NDEBUG
- if (TTI.hasBranchDivergence())
- CurDAG->VerifyDAGDiverence();
-#endif
-
LLVM_DEBUG(dbgs() << "Type-legalized selection DAG: "
<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
<< "'\n";
CurDAG->dump());
+#ifndef NDEBUG
+ if (TTI.hasBranchDivergence())
+ CurDAG->VerifyDAGDiverence();
+#endif
+
// Only allow creation of legal node types.
CurDAG->NewNodesMustHaveLegalTypes = true;
@@ -834,15 +840,15 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->Combine(AfterLegalizeTypes, AA, OptLevel);
}
-#ifndef NDEBUG
- if (TTI.hasBranchDivergence())
- CurDAG->VerifyDAGDiverence();
-#endif
-
LLVM_DEBUG(dbgs() << "Optimized type-legalized selection DAG: "
<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
<< "'\n";
CurDAG->dump());
+
+#ifndef NDEBUG
+ if (TTI.hasBranchDivergence())
+ CurDAG->VerifyDAGDiverence();
+#endif
}
{
@@ -857,6 +863,11 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
<< "'\n";
CurDAG->dump());
+#ifndef NDEBUG
+ if (TTI.hasBranchDivergence())
+ CurDAG->VerifyDAGDiverence();
+#endif
+
{
NamedRegionTimer T("legalize_types2", "Type Legalization 2", GroupName,
GroupDescription, TimePassesIsEnabled);
@@ -868,6 +879,11 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
<< "'\n";
CurDAG->dump());
+#ifndef NDEBUG
+ if (TTI.hasBranchDivergence())
+ CurDAG->VerifyDAGDiverence();
+#endif
+
if (ViewDAGCombineLT && MatchFilterBB)
CurDAG->viewGraph("dag-combine-lv input for " + BlockName);
@@ -898,16 +914,16 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->Legalize();
}
-#ifndef NDEBUG
- if (TTI.hasBranchDivergence())
- CurDAG->VerifyDAGDiverence();
-#endif
-
LLVM_DEBUG(dbgs() << "Legalized selection DAG: "
<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
<< "'\n";
CurDAG->dump());
+#ifndef NDEBUG
+ if (TTI.hasBranchDivergence())
+ CurDAG->VerifyDAGDiverence();
+#endif
+
if (ViewDAGCombine2 && MatchFilterBB)
CurDAG->viewGraph("dag-combine2 input for " + BlockName);
@@ -918,16 +934,16 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->Combine(AfterLegalizeDAG, AA, OptLevel);
}
-#ifndef NDEBUG
- if (TTI.hasBranchDivergence())
- CurDAG->VerifyDAGDiverence();
-#endif
-
LLVM_DEBUG(dbgs() << "Optimized legalized selection DAG: "
<< printMBBReference(*FuncInfo->MBB) << " '" << BlockName
<< "'\n";
CurDAG->dump());
+#ifndef NDEBUG
+ if (TTI.hasBranchDivergence())
+ CurDAG->VerifyDAGDiverence();
+#endif
+
if (OptLevel != CodeGenOpt::None)
ComputeLiveOutVRegInfo();
@@ -1251,6 +1267,12 @@ bool SelectionDAGISel::PrepareEHLandingPad() {
BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II)
.addSym(Label);
+ // If the unwinder does not preserve all registers, ensure that the
+ // function marks the clobbered registers as used.
+ const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
+ if (auto *RegMask = TRI.getCustomEHPadPreservedMask(*MF))
+ MF->getRegInfo().addPhysRegsUsedFromRegMask(RegMask);
+
if (Pers == EHPersonality::Wasm_CXX) {
if (const auto *CPI = dyn_cast<CatchPadInst>(LLVMBB->getFirstNonPHI()))
mapWasmLandingPadIndex(MBB, CPI);
@@ -2072,7 +2094,7 @@ void SelectionDAGISel::SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops,
InlineAsm::getFlagWord(InlineAsm::Kind_Mem, SelOps.size());
NewFlags = InlineAsm::getFlagWordForMem(NewFlags, ConstraintID);
Ops.push_back(CurDAG->getTargetConstant(NewFlags, DL, MVT::i32));
- Ops.insert(Ops.end(), SelOps.begin(), SelOps.end());
+ llvm::append_range(Ops, SelOps);
i += 2;
}
}
@@ -2272,7 +2294,7 @@ void SelectionDAGISel::Select_FREEZE(SDNode *N) {
}
/// GetVBR - decode a vbr encoding whose top bit is set.
-LLVM_ATTRIBUTE_ALWAYS_INLINE static inline uint64_t
+LLVM_ATTRIBUTE_ALWAYS_INLINE static uint64_t
GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) {
assert(Val >= 128 && "Not a VBR");
Val &= 127; // Remove first vbr bit.
@@ -2331,7 +2353,7 @@ void SelectionDAGISel::UpdateChains(
// If the node became dead and we haven't already seen it, delete it.
if (ChainNode != NodeToMatch && ChainNode->use_empty() &&
- !std::count(NowDeadNodes.begin(), NowDeadNodes.end(), ChainNode))
+ !llvm::is_contained(NowDeadNodes, ChainNode))
NowDeadNodes.push_back(ChainNode);
}
}
@@ -2469,10 +2491,9 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList,
}
/// CheckSame - Implements OP_CheckSame.
-LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
-CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex,
- SDValue N,
- const SmallVectorImpl<std::pair<SDValue, SDNode*>> &RecordedNodes) {
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N,
+ const SmallVectorImpl<std::pair<SDValue, SDNode *>> &RecordedNodes) {
// Accept if it is exactly the same as a previously recorded node.
unsigned RecNo = MatcherTable[MatcherIndex++];
assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
@@ -2480,11 +2501,10 @@ CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex,
}
/// CheckChildSame - Implements OP_CheckChildXSame.
-LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
-CheckChildSame(const unsigned char *MatcherTable, unsigned &MatcherIndex,
- SDValue N,
- const SmallVectorImpl<std::pair<SDValue, SDNode*>> &RecordedNodes,
- unsigned ChildNo) {
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckChildSame(
+ const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N,
+ const SmallVectorImpl<std::pair<SDValue, SDNode *>> &RecordedNodes,
+ unsigned ChildNo) {
if (ChildNo >= N.getNumOperands())
return false; // Match fails if out of range child #.
return ::CheckSame(MatcherTable, MatcherIndex, N.getOperand(ChildNo),
@@ -2492,20 +2512,20 @@ CheckChildSame(const unsigned char *MatcherTable, unsigned &MatcherIndex,
}
/// CheckPatternPredicate - Implements OP_CheckPatternPredicate.
-LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckPatternPredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex,
const SelectionDAGISel &SDISel) {
return SDISel.CheckPatternPredicate(MatcherTable[MatcherIndex++]);
}
/// CheckNodePredicate - Implements OP_CheckNodePredicate.
-LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckNodePredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex,
const SelectionDAGISel &SDISel, SDNode *N) {
return SDISel.CheckNodePredicate(N, MatcherTable[MatcherIndex++]);
}
-LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDNode *N) {
uint16_t Opc = MatcherTable[MatcherIndex++];
@@ -2513,7 +2533,7 @@ CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
return N->getOpcode() == Opc;
}
-LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N,
const TargetLowering *TLI, const DataLayout &DL) {
MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
@@ -2523,7 +2543,7 @@ CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N,
return VT == MVT::iPTR && N.getValueType() == TLI->getPointerTy(DL);
}
-LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N, const TargetLowering *TLI, const DataLayout &DL,
unsigned ChildNo) {
@@ -2533,14 +2553,14 @@ CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
DL);
}
-LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckCondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N) {
return cast<CondCodeSDNode>(N)->get() ==
(ISD::CondCode)MatcherTable[MatcherIndex++];
}
-LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckChild2CondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N) {
if (2 >= N.getNumOperands())
@@ -2548,7 +2568,7 @@ CheckChild2CondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
return ::CheckCondCode(MatcherTable, MatcherIndex, N.getOperand(2));
}
-LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N, const TargetLowering *TLI, const DataLayout &DL) {
MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
@@ -2559,7 +2579,7 @@ CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
return VT == MVT::iPTR && cast<VTSDNode>(N)->getVT() == TLI->getPointerTy(DL);
}
-LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N) {
int64_t Val = MatcherTable[MatcherIndex++];
@@ -2570,7 +2590,7 @@ CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex,
return C && C->getSExtValue() == Val;
}
-LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckChildInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N, unsigned ChildNo) {
if (ChildNo >= N.getNumOperands())
@@ -2578,7 +2598,7 @@ CheckChildInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex,
return ::CheckInteger(MatcherTable, MatcherIndex, N.getOperand(ChildNo));
}
-LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N, const SelectionDAGISel &SDISel) {
int64_t Val = MatcherTable[MatcherIndex++];
@@ -2591,9 +2611,9 @@ CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
return C && SDISel.CheckAndMask(N.getOperand(0), C, Val);
}
-LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
-CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
- SDValue N, const SelectionDAGISel &SDISel) {
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N,
+ const SelectionDAGISel &SDISel) {
int64_t Val = MatcherTable[MatcherIndex++];
if (Val & 128)
Val = GetVBR(Val, MatcherTable, MatcherIndex);
@@ -2786,6 +2806,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
case ISD::ANNOTATION_LABEL:
case ISD::LIFETIME_START:
case ISD::LIFETIME_END:
+ case ISD::PSEUDO_PROBE:
NodeToMatch->setNodeId(-1); // Mark selected.
return;
case ISD::AssertSext:
@@ -3181,10 +3202,12 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
if (!::CheckOrImm(MatcherTable, MatcherIndex, N, *this)) break;
continue;
case OPC_CheckImmAllOnesV:
- if (!ISD::isBuildVectorAllOnes(N.getNode())) break;
+ if (!ISD::isConstantSplatVectorAllOnes(N.getNode()))
+ break;
continue;
case OPC_CheckImmAllZerosV:
- if (!ISD::isBuildVectorAllZeros(N.getNode())) break;
+ if (!ISD::isConstantSplatVectorAllZeros(N.getNode()))
+ break;
continue;
case OPC_CheckFoldableChainNode: {
@@ -3489,7 +3512,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
auto &Chain = ChainNodesMatched;
assert((!E || !is_contained(Chain, N)) &&
"Chain node replaced during MorphNode");
- Chain.erase(std::remove(Chain.begin(), Chain.end(), N), Chain.end());
+ llvm::erase_value(Chain, N);
});
Res = cast<MachineSDNode>(MorphNode(NodeToMatch, TargetOpc, VTList,
Ops, EmitNodeInfo));
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index 2cb57c1d1ccc..0172646c22ec 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -14,12 +14,10 @@
#include "StatepointLowering.h"
#include "SelectionDAGBuilder.h"
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GCMetadata.h"
@@ -30,7 +28,6 @@
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetOpcodes.h"
@@ -67,6 +64,20 @@ cl::opt<bool> UseRegistersForDeoptValues(
"use-registers-for-deopt-values", cl::Hidden, cl::init(false),
cl::desc("Allow using registers for non pointer deopt args"));
+cl::opt<bool> UseRegistersForGCPointersInLandingPad(
+ "use-registers-for-gc-values-in-landing-pad", cl::Hidden, cl::init(false),
+ cl::desc("Allow using registers for gc pointer in landing pad"));
+
+cl::opt<unsigned> MaxRegistersForGCPointers(
+ "max-registers-for-gc-values", cl::Hidden, cl::init(0),
+ cl::desc("Max number of VRegs allowed to pass GC pointer meta args in"));
+
+cl::opt<bool> AlwaysSpillBase("statepoint-always-spill-base", cl::Hidden,
+ cl::init(true),
+ cl::desc("Force spilling of base GC pointers"));
+
+typedef FunctionLoweringInfo::StatepointRelocationRecord RecordType;
+
static void pushStackMapConstant(SmallVectorImpl<SDValue>& Ops,
SelectionDAGBuilder &Builder, uint64_t Value) {
SDLoc L = Builder.getCurSDLoc();
@@ -156,14 +167,18 @@ static Optional<int> findPreviousSpillSlot(const Value *Val,
// Spill location is known for gc relocates
if (const auto *Relocate = dyn_cast<GCRelocateInst>(Val)) {
- const auto &SpillMap =
- Builder.FuncInfo.StatepointSpillMaps[Relocate->getStatepoint()];
+ const auto &RelocationMap =
+ Builder.FuncInfo.StatepointRelocationMaps[Relocate->getStatepoint()];
- auto It = SpillMap.find(Relocate->getDerivedPtr());
- if (It == SpillMap.end())
+ auto It = RelocationMap.find(Relocate->getDerivedPtr());
+ if (It == RelocationMap.end())
return None;
- return It->second;
+ auto &Record = It->second;
+ if (Record.type != RecordType::Spill)
+ return None;
+
+ return Record.payload.FI;
}
// Look through bitcast instructions.
@@ -221,7 +236,6 @@ static Optional<int> findPreviousSpillSlot(const Value *Val,
return None;
}
-
/// Return true if-and-only-if the given SDValue can be lowered as either a
/// constant argument or a stack reference. The key point is that the value
/// doesn't need to be spilled or tracked as a vreg use.
@@ -242,7 +256,6 @@ static bool willLowerDirectly(SDValue Incoming) {
Incoming.isUndef());
}
-
/// Try to find existing copies of the incoming values in stack slots used for
/// statepoint spilling. If we can find a spill slot for the incoming value,
/// mark that slot as allocated, and reuse the same slot for this safepoint.
@@ -388,7 +401,7 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain,
StoreMMO);
MMO = getMachineMemOperand(MF, *cast<FrameIndexSDNode>(Loc));
-
+
Builder.StatepointLowering.setLocation(Incoming, Loc);
}
@@ -485,7 +498,10 @@ lowerIncomingStatepointValue(SDValue Incoming, bool RequireSpillSlot,
/// will be set to the last value spilled (if any were).
static void
lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
- SmallVectorImpl<MachineMemOperand*> &MemRefs, SelectionDAGBuilder::StatepointLoweringInfo &SI,
+ SmallVectorImpl<MachineMemOperand *> &MemRefs,
+ SmallVectorImpl<SDValue> &GCPtrs,
+ DenseMap<SDValue, int> &LowerAsVReg,
+ SelectionDAGBuilder::StatepointLoweringInfo &SI,
SelectionDAGBuilder &Builder) {
// Lower the deopt and gc arguments for this statepoint. Layout will be:
// deopt argument length, deopt arguments.., gc arguments...
@@ -531,6 +547,66 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
const bool LiveInDeopt =
SI.StatepointFlags & (uint64_t)StatepointFlags::DeoptLiveIn;
+ // Decide which deriver pointers will go on VRegs
+ unsigned MaxVRegPtrs = MaxRegistersForGCPointers.getValue();
+
+ // Pointers used on exceptional path of invoke statepoint.
+ // We cannot assing them to VRegs.
+ SmallSet<SDValue, 8> LPadPointers;
+ if (!UseRegistersForGCPointersInLandingPad)
+ if (auto *StInvoke = dyn_cast_or_null<InvokeInst>(SI.StatepointInstr)) {
+ LandingPadInst *LPI = StInvoke->getLandingPadInst();
+ for (auto *Relocate : SI.GCRelocates)
+ if (Relocate->getOperand(0) == LPI) {
+ LPadPointers.insert(Builder.getValue(Relocate->getBasePtr()));
+ LPadPointers.insert(Builder.getValue(Relocate->getDerivedPtr()));
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "Deciding how to lower GC Pointers:\n");
+
+ // List of unique lowered GC Pointer values.
+ SmallSetVector<SDValue, 16> LoweredGCPtrs;
+ // Map lowered GC Pointer value to the index in above vector
+ DenseMap<SDValue, unsigned> GCPtrIndexMap;
+
+ unsigned CurNumVRegs = 0;
+
+ auto canPassGCPtrOnVReg = [&](SDValue SD) {
+ if (SD.getValueType().isVector())
+ return false;
+ if (LPadPointers.count(SD))
+ return false;
+ return !willLowerDirectly(SD);
+ };
+
+ auto processGCPtr = [&](const Value *V) {
+ SDValue PtrSD = Builder.getValue(V);
+ if (!LoweredGCPtrs.insert(PtrSD))
+ return; // skip duplicates
+ GCPtrIndexMap[PtrSD] = LoweredGCPtrs.size() - 1;
+
+ assert(!LowerAsVReg.count(PtrSD) && "must not have been seen");
+ if (LowerAsVReg.size() == MaxVRegPtrs)
+ return;
+ assert(V->getType()->isVectorTy() == PtrSD.getValueType().isVector() &&
+ "IR and SD types disagree");
+ if (!canPassGCPtrOnVReg(PtrSD)) {
+ LLVM_DEBUG(dbgs() << "direct/spill "; PtrSD.dump(&Builder.DAG));
+ return;
+ }
+ LLVM_DEBUG(dbgs() << "vreg "; PtrSD.dump(&Builder.DAG));
+ LowerAsVReg[PtrSD] = CurNumVRegs++;
+ };
+
+ // Process derived pointers first to give them more chance to go on VReg.
+ for (const Value *V : SI.Ptrs)
+ processGCPtr(V);
+ for (const Value *V : SI.Bases)
+ processGCPtr(V);
+
+ LLVM_DEBUG(dbgs() << LowerAsVReg.size() << " pointers will go in vregs\n");
+
auto isGCValue = [&](const Value *V) {
auto *Ty = V->getType();
if (!Ty->isPtrOrPtrVectorTy())
@@ -542,7 +618,9 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
};
auto requireSpillSlot = [&](const Value *V) {
- return !(LiveInDeopt || UseRegistersForDeoptValues) || isGCValue(V);
+ if (isGCValue(V))
+ return !LowerAsVReg.count(Builder.getValue(V));
+ return !(LiveInDeopt || UseRegistersForDeoptValues);
};
// Before we actually start lowering (and allocating spill slots for values),
@@ -554,9 +632,17 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
if (requireSpillSlot(V))
reservePreviousStackSlotForValue(V, Builder);
}
- for (unsigned i = 0; i < SI.Bases.size(); ++i) {
- reservePreviousStackSlotForValue(SI.Bases[i], Builder);
- reservePreviousStackSlotForValue(SI.Ptrs[i], Builder);
+
+ for (const Value *V : SI.Ptrs) {
+ SDValue SDV = Builder.getValue(V);
+ if (!LowerAsVReg.count(SDV))
+ reservePreviousStackSlotForValue(V, Builder);
+ }
+
+ for (const Value *V : SI.Bases) {
+ SDValue SDV = Builder.getValue(V);
+ if (!LowerAsVReg.count(SDV))
+ reservePreviousStackSlotForValue(V, Builder);
}
// First, prefix the list with the number of unique values to be
@@ -567,6 +653,7 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
// The vm state arguments are lowered in an opaque manner. We do not know
// what type of values are contained within.
+ LLVM_DEBUG(dbgs() << "Lowering deopt state\n");
for (const Value *V : SI.DeoptState) {
SDValue Incoming;
// If this is a function argument at a static frame index, generate it as
@@ -578,78 +665,56 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
}
if (!Incoming.getNode())
Incoming = Builder.getValue(V);
+ LLVM_DEBUG(dbgs() << "Value " << *V
+ << " requireSpillSlot = " << requireSpillSlot(V) << "\n");
lowerIncomingStatepointValue(Incoming, requireSpillSlot(V), Ops, MemRefs,
Builder);
}
- // Finally, go ahead and lower all the gc arguments. There's no prefixed
- // length for this one. After lowering, we'll have the base and pointer
- // arrays interwoven with each (lowered) base pointer immediately followed by
- // it's (lowered) derived pointer. i.e
- // (base[0], ptr[0], base[1], ptr[1], ...)
- for (unsigned i = 0; i < SI.Bases.size(); ++i) {
- const Value *Base = SI.Bases[i];
- lowerIncomingStatepointValue(Builder.getValue(Base),
- /*RequireSpillSlot*/ true, Ops, MemRefs,
+ // Finally, go ahead and lower all the gc arguments.
+ pushStackMapConstant(Ops, Builder, LoweredGCPtrs.size());
+ for (SDValue SDV : LoweredGCPtrs)
+ lowerIncomingStatepointValue(SDV, !LowerAsVReg.count(SDV), Ops, MemRefs,
Builder);
- const Value *Ptr = SI.Ptrs[i];
- lowerIncomingStatepointValue(Builder.getValue(Ptr),
- /*RequireSpillSlot*/ true, Ops, MemRefs,
- Builder);
- }
+ // Copy to out vector. LoweredGCPtrs will be empty after this point.
+ GCPtrs = LoweredGCPtrs.takeVector();
// If there are any explicit spill slots passed to the statepoint, record
// them, but otherwise do not do anything special. These are user provided
// allocas and give control over placement to the consumer. In this case,
// it is the contents of the slot which may get updated, not the pointer to
// the alloca
+ SmallVector<SDValue, 4> Allocas;
for (Value *V : SI.GCArgs) {
SDValue Incoming = Builder.getValue(V);
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Incoming)) {
// This handles allocas as arguments to the statepoint
assert(Incoming.getValueType() == Builder.getFrameIndexTy() &&
"Incoming value is a frame index!");
- Ops.push_back(Builder.DAG.getTargetFrameIndex(FI->getIndex(),
- Builder.getFrameIndexTy()));
+ Allocas.push_back(Builder.DAG.getTargetFrameIndex(
+ FI->getIndex(), Builder.getFrameIndexTy()));
auto &MF = Builder.DAG.getMachineFunction();
auto *MMO = getMachineMemOperand(MF, *FI);
MemRefs.push_back(MMO);
}
}
+ pushStackMapConstant(Ops, Builder, Allocas.size());
+ Ops.append(Allocas.begin(), Allocas.end());
- // Record computed locations for all lowered values.
- // This can not be embedded in lowering loops as we need to record *all*
- // values, while previous loops account only values with unique SDValues.
- const Instruction *StatepointInstr = SI.StatepointInstr;
- auto &SpillMap = Builder.FuncInfo.StatepointSpillMaps[StatepointInstr];
-
- for (const GCRelocateInst *Relocate : SI.GCRelocates) {
- const Value *V = Relocate->getDerivedPtr();
- SDValue SDV = Builder.getValue(V);
- SDValue Loc = Builder.StatepointLowering.getLocation(SDV);
-
- if (Loc.getNode()) {
- SpillMap[V] = cast<FrameIndexSDNode>(Loc)->getIndex();
- } else {
- // Record value as visited, but not spilled. This is case for allocas
- // and constants. For this values we can avoid emitting spill load while
- // visiting corresponding gc_relocate.
- // Actually we do not need to record them in this map at all.
- // We do this only to check that we are not relocating any unvisited
- // value.
- SpillMap[V] = None;
-
- // Default llvm mechanisms for exporting values which are used in
- // different basic blocks does not work for gc relocates.
- // Note that it would be incorrect to teach llvm that all relocates are
- // uses of the corresponding values so that it would automatically
- // export them. Relocates of the spilled values does not use original
- // value.
- if (Relocate->getParent() != StatepointInstr->getParent())
- Builder.ExportFromCurrentBlock(V);
- }
+ // Now construct GC base/derived map;
+ pushStackMapConstant(Ops, Builder, SI.Ptrs.size());
+ SDLoc L = Builder.getCurSDLoc();
+ for (unsigned i = 0; i < SI.Ptrs.size(); ++i) {
+ SDValue Base = Builder.getValue(SI.Bases[i]);
+ assert(GCPtrIndexMap.count(Base) && "base not found in index map");
+ Ops.push_back(
+ Builder.DAG.getTargetConstant(GCPtrIndexMap[Base], L, MVT::i64));
+ SDValue Derived = Builder.getValue(SI.Ptrs[i]);
+ assert(GCPtrIndexMap.count(Derived) && "derived not found in index map");
+ Ops.push_back(
+ Builder.DAG.getTargetConstant(GCPtrIndexMap[Derived], L, MVT::i64));
}
}
@@ -665,6 +730,7 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
assert(SI.Bases.size() == SI.Ptrs.size() &&
SI.Ptrs.size() <= SI.GCRelocates.size());
+ LLVM_DEBUG(dbgs() << "Lowering statepoint " << *SI.StatepointInstr << "\n");
#ifndef NDEBUG
for (auto *Reloc : SI.GCRelocates)
if (Reloc->getParent() == SI.StatepointInstr->getParent())
@@ -672,9 +738,16 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
#endif
// Lower statepoint vmstate and gcstate arguments
+
+ // All lowered meta args.
SmallVector<SDValue, 10> LoweredMetaArgs;
+ // Lowered GC pointers (subset of above).
+ SmallVector<SDValue, 16> LoweredGCArgs;
SmallVector<MachineMemOperand*, 16> MemRefs;
- lowerStatepointMetaArgs(LoweredMetaArgs, MemRefs, SI, *this);
+ // Maps derived pointer SDValue to statepoint result of relocated pointer.
+ DenseMap<SDValue, int> LowerAsVReg;
+ lowerStatepointMetaArgs(LoweredMetaArgs, MemRefs, LoweredGCArgs, LowerAsVReg,
+ SI, *this);
// Now that we've emitted the spills, we need to update the root so that the
// call sequence is ordered correctly.
@@ -774,7 +847,7 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
pushStackMapConstant(Ops, *this, Flags);
// Insert all vmstate and gcstate arguments
- Ops.insert(Ops.end(), LoweredMetaArgs.begin(), LoweredMetaArgs.end());
+ llvm::append_range(Ops, LoweredMetaArgs);
// Add register mask from call node
Ops.push_back(*RegMaskIt);
@@ -788,12 +861,79 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
// Compute return values. Provide a glue output since we consume one as
// input. This allows someone else to chain off us as needed.
- SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SmallVector<EVT, 8> NodeTys;
+ for (auto SD : LoweredGCArgs) {
+ if (!LowerAsVReg.count(SD))
+ continue;
+ NodeTys.push_back(SD.getValueType());
+ }
+ LLVM_DEBUG(dbgs() << "Statepoint has " << NodeTys.size() << " results\n");
+ assert(NodeTys.size() == LowerAsVReg.size() && "Inconsistent GC Ptr lowering");
+ NodeTys.push_back(MVT::Other);
+ NodeTys.push_back(MVT::Glue);
+ unsigned NumResults = NodeTys.size();
MachineSDNode *StatepointMCNode =
DAG.getMachineNode(TargetOpcode::STATEPOINT, getCurSDLoc(), NodeTys, Ops);
DAG.setNodeMemRefs(StatepointMCNode, MemRefs);
+ // For values lowered to tied-defs, create the virtual registers. Note that
+ // for simplicity, we *always* create a vreg even within a single block.
+ DenseMap<SDValue, Register> VirtRegs;
+ for (const auto *Relocate : SI.GCRelocates) {
+ Value *Derived = Relocate->getDerivedPtr();
+ SDValue SD = getValue(Derived);
+ if (!LowerAsVReg.count(SD))
+ continue;
+
+ // Handle multiple gc.relocates of the same input efficiently.
+ if (VirtRegs.count(SD))
+ continue;
+
+ SDValue Relocated = SDValue(StatepointMCNode, LowerAsVReg[SD]);
+
+ auto *RetTy = Relocate->getType();
+ Register Reg = FuncInfo.CreateRegs(RetTy);
+ RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
+ DAG.getDataLayout(), Reg, RetTy, None);
+ SDValue Chain = DAG.getRoot();
+ RFV.getCopyToRegs(Relocated, DAG, getCurSDLoc(), Chain, nullptr);
+ PendingExports.push_back(Chain);
+
+ VirtRegs[SD] = Reg;
+ }
+
+ // Record for later use how each relocation was lowered. This is needed to
+ // allow later gc.relocates to mirror the lowering chosen.
+ const Instruction *StatepointInstr = SI.StatepointInstr;
+ auto &RelocationMap = FuncInfo.StatepointRelocationMaps[StatepointInstr];
+ for (const GCRelocateInst *Relocate : SI.GCRelocates) {
+ const Value *V = Relocate->getDerivedPtr();
+ SDValue SDV = getValue(V);
+ SDValue Loc = StatepointLowering.getLocation(SDV);
+
+ RecordType Record;
+ if (LowerAsVReg.count(SDV)) {
+ Record.type = RecordType::VReg;
+ assert(VirtRegs.count(SDV));
+ Record.payload.Reg = VirtRegs[SDV];
+ } else if (Loc.getNode()) {
+ Record.type = RecordType::Spill;
+ Record.payload.FI = cast<FrameIndexSDNode>(Loc)->getIndex();
+ } else {
+ Record.type = RecordType::NoRelocate;
+ // If we didn't relocate a value, we'll essentialy end up inserting an
+ // additional use of the original value when lowering the gc.relocate.
+ // We need to make sure the value is available at the new use, which
+ // might be in another block.
+ if (Relocate->getParent() != StatepointInstr->getParent())
+ ExportFromCurrentBlock(V);
+ }
+ RelocationMap[V] = Record;
+ }
+
+
+
SDNode *SinkNode = StatepointMCNode;
// Build the GC_TRANSITION_END node if necessary.
@@ -804,7 +944,7 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
SmallVector<SDValue, 8> TEOps;
// Add chain
- TEOps.push_back(SDValue(StatepointMCNode, 0));
+ TEOps.push_back(SDValue(StatepointMCNode, NumResults - 2));
// Add GC transition arguments
for (const Value *V : SI.GCTransitionArgs) {
@@ -814,7 +954,7 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
}
// Add glue
- TEOps.push_back(SDValue(StatepointMCNode, 1));
+ TEOps.push_back(SDValue(StatepointMCNode, NumResults - 1));
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
@@ -825,12 +965,18 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
}
// Replace original call
- DAG.ReplaceAllUsesWith(CallNode, SinkNode); // This may update Root
+ // Call: ch,glue = CALL ...
+ // Statepoint: [gc relocates],ch,glue = STATEPOINT ...
+ unsigned NumSinkValues = SinkNode->getNumValues();
+ SDValue StatepointValues[2] = {SDValue(SinkNode, NumSinkValues - 2),
+ SDValue(SinkNode, NumSinkValues - 1)};
+ DAG.ReplaceAllUsesWith(CallNode, StatepointValues);
// Remove original call node
DAG.DeleteNode(CallNode);
- // DON'T set the root - under the assumption that it's already set past the
- // inserted node we created.
+ // Since we always emit CopyToRegs (even for local relocates), we must
+ // update root, so that they are emitted before any local uses.
+ (void)getControlRoot();
// TODO: A better future implementation would be to emit a single variable
// argument, variable return value STATEPOINT node here and then hookup the
@@ -927,7 +1073,7 @@ SelectionDAGBuilder::LowerStatepoint(const GCStatepointInst &I,
setValue(&I, ReturnValue);
return;
}
-
+
// Result value will be used in a different basic block so we need to export
// it now. Default exporting mechanism will not work here because statepoint
// call has a different type than the actual call. It means that by default
@@ -1024,6 +1170,28 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) {
#endif
const Value *DerivedPtr = Relocate.getDerivedPtr();
+ auto &RelocationMap =
+ FuncInfo.StatepointRelocationMaps[Relocate.getStatepoint()];
+ auto SlotIt = RelocationMap.find(DerivedPtr);
+ assert(SlotIt != RelocationMap.end() && "Relocating not lowered gc value");
+ const RecordType &Record = SlotIt->second;
+
+ // If relocation was done via virtual register..
+ if (Record.type == RecordType::VReg) {
+ Register InReg = Record.payload.Reg;
+ RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
+ DAG.getDataLayout(), InReg, Relocate.getType(),
+ None); // This is not an ABI copy.
+ // We generate copy to/from regs even for local uses, hence we must
+ // chain with current root to ensure proper ordering of copies w.r.t.
+ // statepoint.
+ SDValue Chain = DAG.getRoot();
+ SDValue Relocation = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(),
+ Chain, nullptr, nullptr);
+ setValue(&Relocate, Relocation);
+ return;
+ }
+
SDValue SD = getValue(DerivedPtr);
if (SD.isUndef() && SD.getValueType().getSizeInBits() <= 64) {
@@ -1033,19 +1201,17 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) {
return;
}
- auto &SpillMap = FuncInfo.StatepointSpillMaps[Relocate.getStatepoint()];
- auto SlotIt = SpillMap.find(DerivedPtr);
- assert(SlotIt != SpillMap.end() && "Relocating not lowered gc value");
- Optional<int> DerivedPtrLocation = SlotIt->second;
// We didn't need to spill these special cases (constants and allocas).
// See the handling in spillIncomingValueForStatepoint for detail.
- if (!DerivedPtrLocation) {
+ if (Record.type == RecordType::NoRelocate) {
setValue(&Relocate, SD);
return;
}
- unsigned Index = *DerivedPtrLocation;
+ assert(Record.type == RecordType::Spill);
+
+ unsigned Index = Record.payload.FI;;
SDValue SpillSlot = DAG.getTargetFrameIndex(Index, getFrameIndexTy());
// All the reloads are independent and are reading memory only modified by
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 8b3e6189a07f..5760132e44a0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -93,7 +93,7 @@ bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
SDValue Value = OutVals[I];
if (Value->getOpcode() != ISD::CopyFromReg)
return false;
- MCRegister ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
+ Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
if (MRI.getLiveInPhysReg(ArgReg) != Reg)
return false;
}
@@ -250,7 +250,7 @@ bool TargetLowering::findOptimalMemOpLowering(
bool Fast;
if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
allowsMisalignedMemoryAccesses(
- VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign().value() : 0,
+ VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign().value() : 1,
MachineMemOperand::MONone, &Fast) &&
Fast)
VTSize = Size;
@@ -912,8 +912,14 @@ bool TargetLowering::SimplifyDemandedBits(
if (Op.getOpcode() == ISD::Constant) {
// We know all of the bits for a constant!
- Known.One = cast<ConstantSDNode>(Op)->getAPIntValue();
- Known.Zero = ~Known.One;
+ Known = KnownBits::makeConstant(cast<ConstantSDNode>(Op)->getAPIntValue());
+ return false;
+ }
+
+ if (Op.getOpcode() == ISD::ConstantFP) {
+ // We know all of the bits for a floating point constant!
+ Known = KnownBits::makeConstant(
+ cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt());
return false;
}
@@ -1009,10 +1015,8 @@ bool TargetLowering::SimplifyDemandedBits(
Depth + 1))
return true;
- if (!!DemandedVecElts) {
- Known.One &= KnownVec.One;
- Known.Zero &= KnownVec.Zero;
- }
+ if (!!DemandedVecElts)
+ Known = KnownBits::commonBits(Known, KnownVec);
return false;
}
@@ -1037,14 +1041,10 @@ bool TargetLowering::SimplifyDemandedBits(
Known.Zero.setAllBits();
Known.One.setAllBits();
- if (!!DemandedSubElts) {
- Known.One &= KnownSub.One;
- Known.Zero &= KnownSub.Zero;
- }
- if (!!DemandedSrcElts) {
- Known.One &= KnownSrc.One;
- Known.Zero &= KnownSrc.Zero;
- }
+ if (!!DemandedSubElts)
+ Known = KnownBits::commonBits(Known, KnownSub);
+ if (!!DemandedSrcElts)
+ Known = KnownBits::commonBits(Known, KnownSrc);
// Attempt to avoid multi-use src if we don't need anything from it.
if (!DemandedBits.isAllOnesValue() || !DemandedSubElts.isAllOnesValue() ||
@@ -1101,10 +1101,8 @@ bool TargetLowering::SimplifyDemandedBits(
Known2, TLO, Depth + 1))
return true;
// Known bits are shared by every demanded subvector element.
- if (!!DemandedSubElts) {
- Known.One &= Known2.One;
- Known.Zero &= Known2.Zero;
- }
+ if (!!DemandedSubElts)
+ Known = KnownBits::commonBits(Known, Known2);
}
break;
}
@@ -1142,15 +1140,13 @@ bool TargetLowering::SimplifyDemandedBits(
if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
Depth + 1))
return true;
- Known.One &= Known2.One;
- Known.Zero &= Known2.Zero;
+ Known = KnownBits::commonBits(Known, Known2);
}
if (!!DemandedRHS) {
if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
Depth + 1))
return true;
- Known.One &= Known2.One;
- Known.Zero &= Known2.Zero;
+ Known = KnownBits::commonBits(Known, Known2);
}
// Attempt to avoid multi-use ops if we don't need anything from them.
@@ -1325,15 +1321,15 @@ bool TargetLowering::SimplifyDemandedBits(
return true;
// If all of the unknown bits are known to be zero on one side or the other
- // (but not both) turn this into an *inclusive* or.
+ // turn this into an *inclusive* or.
// e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
ConstantSDNode* C = isConstOrConstSplat(Op1, DemandedElts);
if (C) {
- // If one side is a constant, and all of the known set bits on the other
- // side are also set in the constant, turn this into an AND, as we know
+ // If one side is a constant, and all of the set bits in the constant are
+ // also known set on the other side, turn this into an AND, as we know
// the bits will be cleared.
// e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
// NB: it is okay if more bits are known than are requested
@@ -1377,8 +1373,7 @@ bool TargetLowering::SimplifyDemandedBits(
return true;
// Only known if known in both the LHS and RHS.
- Known.One &= Known2.One;
- Known.Zero &= Known2.Zero;
+ Known = KnownBits::commonBits(Known, Known2);
break;
case ISD::SELECT_CC:
if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, Known, TLO,
@@ -1395,8 +1390,7 @@ bool TargetLowering::SimplifyDemandedBits(
return true;
// Only known if known in both the LHS and RHS.
- Known.One &= Known2.One;
- Known.Zero &= Known2.Zero;
+ Known = KnownBits::commonBits(Known, Known2);
break;
case ISD::SETCC: {
SDValue Op0 = Op.getOperand(0);
@@ -1728,6 +1722,32 @@ bool TargetLowering::SimplifyDemandedBits(
}
break;
}
+ case ISD::UMIN: {
+ // Check if one arg is always less than (or equal) to the other arg.
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
+ KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
+ Known = KnownBits::umin(Known0, Known1);
+ if (Optional<bool> IsULE = KnownBits::ule(Known0, Known1))
+ return TLO.CombineTo(Op, IsULE.getValue() ? Op0 : Op1);
+ if (Optional<bool> IsULT = KnownBits::ult(Known0, Known1))
+ return TLO.CombineTo(Op, IsULT.getValue() ? Op0 : Op1);
+ break;
+ }
+ case ISD::UMAX: {
+ // Check if one arg is always greater than (or equal) to the other arg.
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
+ KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
+ Known = KnownBits::umax(Known0, Known1);
+ if (Optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
+ return TLO.CombineTo(Op, IsUGE.getValue() ? Op0 : Op1);
+ if (Optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
+ return TLO.CombineTo(Op, IsUGT.getValue() ? Op0 : Op1);
+ break;
+ }
case ISD::BITREVERSE: {
SDValue Src = Op.getOperand(0);
APInt DemandedSrcBits = DemandedBits.reverseBits();
@@ -1748,6 +1768,17 @@ bool TargetLowering::SimplifyDemandedBits(
Known.Zero = Known2.Zero.byteSwap();
break;
}
+ case ISD::CTPOP: {
+ // If only 1 bit is demanded, replace with PARITY as long as we're before
+ // op legalization.
+ // FIXME: Limit to scalars for now.
+ if (DemandedBits.isOneValue() && !TLO.LegalOps && !VT.isVector())
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
+ Op.getOperand(0)));
+
+ Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
+ break;
+ }
case ISD::SIGN_EXTEND_INREG: {
SDValue Op0 = Op.getOperand(0);
EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
@@ -1858,6 +1889,11 @@ bool TargetLowering::SimplifyDemandedBits(
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
assert(Known.getBitWidth() == InBits && "Src width has changed?");
Known = Known.zext(BitWidth);
+
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
+ Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
+ return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
break;
}
case ISD::SIGN_EXTEND:
@@ -1906,6 +1942,11 @@ bool TargetLowering::SimplifyDemandedBits(
if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
}
+
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
+ Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
+ return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
break;
}
case ISD::ANY_EXTEND:
@@ -1945,7 +1986,8 @@ bool TargetLowering::SimplifyDemandedBits(
// zero/one bits live out.
unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
APInt TruncMask = DemandedBits.zext(OperandBitWidth);
- if (SimplifyDemandedBits(Src, TruncMask, Known, TLO, Depth + 1))
+ if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
+ Depth + 1))
return true;
Known = Known.trunc(BitWidth);
@@ -1968,9 +2010,9 @@ bool TargetLowering::SimplifyDemandedBits(
// undesirable.
break;
- SDValue ShAmt = Src.getOperand(1);
- auto *ShAmtC = dyn_cast<ConstantSDNode>(ShAmt);
- if (!ShAmtC || ShAmtC->getAPIntValue().uge(BitWidth))
+ const APInt *ShAmtC =
+ TLO.DAG.getValidShiftAmountConstant(Src, DemandedElts);
+ if (!ShAmtC)
break;
uint64_t ShVal = ShAmtC->getZExtValue();
@@ -1982,12 +2024,12 @@ bool TargetLowering::SimplifyDemandedBits(
if (!(HighBits & DemandedBits)) {
// None of the shifted in bits are needed. Add a truncate of the
// shift input, then shift it.
- if (TLO.LegalTypes())
- ShAmt = TLO.DAG.getConstant(ShVal, dl, getShiftAmountTy(VT, DL));
+ SDValue NewShAmt = TLO.DAG.getConstant(
+ ShVal, dl, getShiftAmountTy(VT, DL, TLO.LegalTypes()));
SDValue NewTrunc =
TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
return TLO.CombineTo(
- Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, ShAmt));
+ Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
}
break;
}
@@ -2012,10 +2054,14 @@ bool TargetLowering::SimplifyDemandedBits(
case ISD::EXTRACT_VECTOR_ELT: {
SDValue Src = Op.getOperand(0);
SDValue Idx = Op.getOperand(1);
- unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
+ ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
unsigned EltBitWidth = Src.getScalarValueSizeInBits();
+ if (SrcEltCnt.isScalable())
+ return false;
+
// Demand the bits from every vector element without a constant index.
+ unsigned NumSrcElts = SrcEltCnt.getFixedValue();
APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts);
if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
if (CIdx->getAPIntValue().ult(NumSrcElts))
@@ -2229,9 +2275,13 @@ bool TargetLowering::SimplifyDemandedBits(
if (C->isOpaque())
return false;
}
- // TODO: Handle float bits as well.
if (VT.isInteger())
return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
+ if (VT.isFloatingPoint())
+ return TLO.CombineTo(
+ Op,
+ TLO.DAG.getConstantFP(
+ APFloat(TLO.DAG.EVTToAPFloatSemantics(VT), Known.One), dl, VT));
}
return false;
@@ -2593,13 +2643,9 @@ bool TargetLowering::SimplifyDemandedVectorElts(
KnownZero, TLO, Depth + 1))
return true;
- KnownUndef.clearBit(Idx);
- if (Scl.isUndef())
- KnownUndef.setBit(Idx);
+ KnownUndef.setBitVal(Idx, Scl.isUndef());
- KnownZero.clearBit(Idx);
- if (isNullConstant(Scl) || isNullFPConstant(Scl))
- KnownZero.setBit(Idx);
+ KnownZero.setBitVal(Idx, isNullConstant(Scl) || isNullFPConstant(Scl));
break;
}
@@ -3347,6 +3393,74 @@ SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
return DAG.getSetCC(DL, VT, X, YShl1, Cond);
}
+static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT,
+ SDValue N0, const APInt &C1,
+ ISD::CondCode Cond, const SDLoc &dl,
+ SelectionDAG &DAG) {
+ // Look through truncs that don't change the value of a ctpop.
+ // FIXME: Add vector support? Need to be careful with setcc result type below.
+ SDValue CTPOP = N0;
+ if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
+ N0.getScalarValueSizeInBits() > Log2_32(N0.getOperand(0).getScalarValueSizeInBits()))
+ CTPOP = N0.getOperand(0);
+
+ if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
+ return SDValue();
+
+ EVT CTVT = CTPOP.getValueType();
+ SDValue CTOp = CTPOP.getOperand(0);
+
+ // If this is a vector CTPOP, keep the CTPOP if it is legal.
+ // TODO: Should we check if CTPOP is legal(or custom) for scalars?
+ if (VT.isVector() && TLI.isOperationLegal(ISD::CTPOP, CTVT))
+ return SDValue();
+
+ // (ctpop x) u< 2 -> (x & x-1) == 0
+ // (ctpop x) u> 1 -> (x & x-1) != 0
+ if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
+ unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond);
+ if (C1.ugt(CostLimit + (Cond == ISD::SETULT)))
+ return SDValue();
+ if (C1 == 0 && (Cond == ISD::SETULT))
+ return SDValue(); // This is handled elsewhere.
+
+ unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
+
+ SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
+ SDValue Result = CTOp;
+ for (unsigned i = 0; i < Passes; i++) {
+ SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne);
+ Result = DAG.getNode(ISD::AND, dl, CTVT, Result, Add);
+ }
+ ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
+ return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
+ }
+
+ // If ctpop is not supported, expand a power-of-2 comparison based on it.
+ if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
+ // For scalars, keep CTPOP if it is legal or custom.
+ if (!VT.isVector() && TLI.isOperationLegalOrCustom(ISD::CTPOP, CTVT))
+ return SDValue();
+ // This is based on X86's custom lowering for CTPOP which produces more
+ // instructions than the expansion here.
+
+ // (ctpop x) == 1 --> (x != 0) && ((x & x-1) == 0)
+ // (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0)
+ SDValue Zero = DAG.getConstant(0, dl, CTVT);
+ SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
+ assert(CTVT.isInteger());
+ ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, CTVT);
+ SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
+ SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
+ SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond);
+ SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
+ unsigned LogicOpcode = Cond == ISD::SETEQ ? ISD::AND : ISD::OR;
+ return DAG.getNode(LogicOpcode, dl, VT, LHS, RHS);
+ }
+
+ return SDValue();
+}
+
/// Try to simplify a setcc built with the specified operands and cc. If it is
/// unable to simplify it, return a null SDValue.
SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
@@ -3363,8 +3477,11 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// Ensure that the constant occurs on the RHS and fold constant comparisons.
// TODO: Handle non-splat vector constants. All undef causes trouble.
+ // FIXME: We can't yet fold constant scalable vector splats, so avoid an
+ // infinite loop here when we encounter one.
ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond);
if (isConstOrConstSplat(N0) &&
+ (!OpVT.isScalableVector() || !isConstOrConstSplat(N1)) &&
(DCI.isBeforeLegalizeOps() ||
isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
@@ -3376,75 +3493,46 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (!isConstOrConstSplat(N0) && !isConstOrConstSplat(N1) &&
(DCI.isBeforeLegalizeOps() ||
isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
- DAG.getNodeIfExists(ISD::SUB, DAG.getVTList(OpVT), { N1, N0 } ) &&
- !DAG.getNodeIfExists(ISD::SUB, DAG.getVTList(OpVT), { N0, N1 } ))
+ DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) &&
+ !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1}))
return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
- if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
+ if (auto *N1C = isConstOrConstSplat(N1)) {
const APInt &C1 = N1C->getAPIntValue();
+ // Optimize some CTPOP cases.
+ if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG))
+ return V;
+
// If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
// equality comparison, then we're just comparing whether X itself is
// zero.
if (N0.getOpcode() == ISD::SRL && (C1.isNullValue() || C1.isOneValue()) &&
N0.getOperand(0).getOpcode() == ISD::CTLZ &&
- N0.getOperand(1).getOpcode() == ISD::Constant) {
- const APInt &ShAmt = N0.getConstantOperandAPInt(1);
- if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
- ShAmt == Log2_32(N0.getValueSizeInBits())) {
- if ((C1 == 0) == (Cond == ISD::SETEQ)) {
- // (srl (ctlz x), 5) == 0 -> X != 0
- // (srl (ctlz x), 5) != 1 -> X != 0
- Cond = ISD::SETNE;
- } else {
- // (srl (ctlz x), 5) != 0 -> X == 0
- // (srl (ctlz x), 5) == 1 -> X == 0
- Cond = ISD::SETEQ;
+ isPowerOf2_32(N0.getScalarValueSizeInBits())) {
+ if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
+ if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ ShAmt->getAPIntValue() == Log2_32(N0.getScalarValueSizeInBits())) {
+ if ((C1 == 0) == (Cond == ISD::SETEQ)) {
+ // (srl (ctlz x), 5) == 0 -> X != 0
+ // (srl (ctlz x), 5) != 1 -> X != 0
+ Cond = ISD::SETNE;
+ } else {
+ // (srl (ctlz x), 5) != 0 -> X == 0
+ // (srl (ctlz x), 5) == 1 -> X == 0
+ Cond = ISD::SETEQ;
+ }
+ SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
+ return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero,
+ Cond);
}
- SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
- return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0),
- Zero, Cond);
}
}
+ }
- SDValue CTPOP = N0;
- // Look through truncs that don't change the value of a ctpop.
- if (N0.hasOneUse() && N0.getOpcode() == ISD::TRUNCATE)
- CTPOP = N0.getOperand(0);
-
- if (CTPOP.hasOneUse() && CTPOP.getOpcode() == ISD::CTPOP &&
- (N0 == CTPOP ||
- N0.getValueSizeInBits() > Log2_32_Ceil(CTPOP.getValueSizeInBits()))) {
- EVT CTVT = CTPOP.getValueType();
- SDValue CTOp = CTPOP.getOperand(0);
-
- // (ctpop x) u< 2 -> (x & x-1) == 0
- // (ctpop x) u> 1 -> (x & x-1) != 0
- if ((Cond == ISD::SETULT && C1 == 2) || (Cond == ISD::SETUGT && C1 == 1)){
- SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
- SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
- SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
- ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
- return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, dl, CTVT), CC);
- }
-
- // If ctpop is not supported, expand a power-of-2 comparison based on it.
- if (C1 == 1 && !isOperationLegalOrCustom(ISD::CTPOP, CTVT) &&
- (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
- // (ctpop x) == 1 --> (x != 0) && ((x & x-1) == 0)
- // (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0)
- SDValue Zero = DAG.getConstant(0, dl, CTVT);
- SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
- assert(CTVT.isInteger());
- ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, CTVT);
- SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
- SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
- SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond);
- SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
- unsigned LogicOpcode = Cond == ISD::SETEQ ? ISD::AND : ISD::OR;
- return DAG.getNode(LogicOpcode, dl, VT, LHS, RHS);
- }
- }
+ // FIXME: Support vectors.
+ if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
+ const APInt &C1 = N1C->getAPIntValue();
// (zext x) == C --> x == (trunc C)
// (sext x) == C --> x == (trunc C)
@@ -3578,11 +3666,12 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT)) {
SDValue Ptr = Lod->getBasePtr();
if (bestOffset != 0)
- Ptr = DAG.getMemBasePlusOffset(Ptr, bestOffset, dl);
- unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset);
- SDValue NewLoad = DAG.getLoad(
- newVT, dl, Lod->getChain(), Ptr,
- Lod->getPointerInfo().getWithOffset(bestOffset), NewAlign);
+ Ptr =
+ DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(bestOffset), dl);
+ SDValue NewLoad =
+ DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
+ Lod->getPointerInfo().getWithOffset(bestOffset),
+ Lod->getOriginalAlign());
return DAG.getSetCC(dl, VT,
DAG.getNode(ISD::AND, dl, newVT, NewLoad,
DAG.getConstant(bestMask.trunc(bestWidth),
@@ -3647,7 +3736,9 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
break; // todo, be more careful with signed comparisons
}
} else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
- (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+ (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ !isSExtCheaperThanZExt(cast<VTSDNode>(N0.getOperand(1))->getVT(),
+ OpVT)) {
EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
EVT ExtDstTy = N0.getValueType();
@@ -3656,26 +3747,18 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// If the constant doesn't fit into the number of bits for the source of
// the sign extension, it is impossible for both sides to be equal.
if (C1.getMinSignedBits() > ExtSrcTyBits)
- return DAG.getConstant(Cond == ISD::SETNE, dl, VT);
+ return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
- SDValue ZextOp;
- EVT Op0Ty = N0.getOperand(0).getValueType();
- if (Op0Ty == ExtSrcTy) {
- ZextOp = N0.getOperand(0);
- } else {
- APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
- ZextOp = DAG.getNode(ISD::AND, dl, Op0Ty, N0.getOperand(0),
- DAG.getConstant(Imm, dl, Op0Ty));
- }
+ assert(ExtDstTy == N0.getOperand(0).getValueType() &&
+ ExtDstTy != ExtSrcTy && "Unexpected types!");
+ APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
+ SDValue ZextOp = DAG.getNode(ISD::AND, dl, ExtDstTy, N0.getOperand(0),
+ DAG.getConstant(Imm, dl, ExtDstTy));
if (!DCI.isCalledByLegalizer())
DCI.AddToWorklist(ZextOp.getNode());
// Otherwise, make this a use of a zext.
return DAG.getSetCC(dl, VT, ZextOp,
- DAG.getConstant(C1 & APInt::getLowBitsSet(
- ExtDstTyBits,
- ExtSrcTyBits),
- dl, ExtDstTy),
- Cond);
+ DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond);
} else if ((N1C->isNullValue() || N1C->isOne()) &&
(Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
// SETCC (SETCC), [0|1], [EQ|NE] -> SETCC
@@ -3699,8 +3782,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
(N0.getOpcode() == ISD::AND &&
N0.getOperand(0).getOpcode() == ISD::XOR &&
N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
- isa<ConstantSDNode>(N0.getOperand(1)) &&
- cast<ConstantSDNode>(N0.getOperand(1))->isOne()) {
+ isOneConstant(N0.getOperand(1))) {
// If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
// can only do this if the top bits are known zero.
unsigned BitWidth = N0.getValueSizeInBits();
@@ -3744,9 +3826,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond);
}
}
- if (Op0.getOpcode() == ISD::AND &&
- isa<ConstantSDNode>(Op0.getOperand(1)) &&
- cast<ConstantSDNode>(Op0.getOperand(1))->isOne()) {
+ if (Op0.getOpcode() == ISD::AND && isOneConstant(Op0.getOperand(1))) {
// If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
if (Op0.getValueType().bitsGT(VT))
Op0 = DAG.getNode(ISD::AND, dl, VT,
@@ -3884,6 +3964,67 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
VT, N0, N1, Cond, DCI, dl))
return CC;
+
+ // For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
+ // For example, when high 32-bits of i64 X are known clear:
+ // all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0
+ // all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1
+ bool CmpZero = N1C->getAPIntValue().isNullValue();
+ bool CmpNegOne = N1C->getAPIntValue().isAllOnesValue();
+ if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
+ // Match or(lo,shl(hi,bw/2)) pattern.
+ auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
+ unsigned EltBits = V.getScalarValueSizeInBits();
+ if (V.getOpcode() != ISD::OR || (EltBits % 2) != 0)
+ return false;
+ SDValue LHS = V.getOperand(0);
+ SDValue RHS = V.getOperand(1);
+ APInt HiBits = APInt::getHighBitsSet(EltBits, EltBits / 2);
+ // Unshifted element must have zero upperbits.
+ if (RHS.getOpcode() == ISD::SHL &&
+ isa<ConstantSDNode>(RHS.getOperand(1)) &&
+ RHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
+ DAG.MaskedValueIsZero(LHS, HiBits)) {
+ Lo = LHS;
+ Hi = RHS.getOperand(0);
+ return true;
+ }
+ if (LHS.getOpcode() == ISD::SHL &&
+ isa<ConstantSDNode>(LHS.getOperand(1)) &&
+ LHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
+ DAG.MaskedValueIsZero(RHS, HiBits)) {
+ Lo = RHS;
+ Hi = LHS.getOperand(0);
+ return true;
+ }
+ return false;
+ };
+
+ auto MergeConcat = [&](SDValue Lo, SDValue Hi) {
+ unsigned EltBits = N0.getScalarValueSizeInBits();
+ unsigned HalfBits = EltBits / 2;
+ APInt HiBits = APInt::getHighBitsSet(EltBits, HalfBits);
+ SDValue LoBits = DAG.getConstant(~HiBits, dl, OpVT);
+ SDValue HiMask = DAG.getNode(ISD::AND, dl, OpVT, Hi, LoBits);
+ SDValue NewN0 =
+ DAG.getNode(CmpZero ? ISD::OR : ISD::AND, dl, OpVT, Lo, HiMask);
+ SDValue NewN1 = CmpZero ? DAG.getConstant(0, dl, OpVT) : LoBits;
+ return DAG.getSetCC(dl, VT, NewN0, NewN1, Cond);
+ };
+
+ SDValue Lo, Hi;
+ if (IsConcat(N0, Lo, Hi))
+ return MergeConcat(Lo, Hi);
+
+ if (N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR) {
+ SDValue Lo0, Lo1, Hi0, Hi1;
+ if (IsConcat(N0.getOperand(0), Lo0, Hi0) &&
+ IsConcat(N0.getOperand(1), Lo1, Hi1)) {
+ return MergeConcat(DAG.getNode(N0.getOpcode(), dl, OpVT, Lo0, Lo1),
+ DAG.getNode(N0.getOpcode(), dl, OpVT, Hi0, Hi1));
+ }
+ }
+ }
}
// If we have "setcc X, C0", check to see if we can shrink the immediate
@@ -3891,20 +4032,20 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// TODO: Support this for vectors after legalize ops.
if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
// SETUGT X, SINTMAX -> SETLT X, 0
- if (Cond == ISD::SETUGT &&
- C1 == APInt::getSignedMaxValue(OperandBitSize))
+ // SETUGE X, SINTMIN -> SETLT X, 0
+ if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) ||
+ (Cond == ISD::SETUGE && C1.isMinSignedValue()))
return DAG.getSetCC(dl, VT, N0,
DAG.getConstant(0, dl, N1.getValueType()),
ISD::SETLT);
// SETULT X, SINTMIN -> SETGT X, -1
- if (Cond == ISD::SETULT &&
- C1 == APInt::getSignedMinValue(OperandBitSize)) {
- SDValue ConstMinusOne =
- DAG.getConstant(APInt::getAllOnesValue(OperandBitSize), dl,
- N1.getValueType());
- return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT);
- }
+ // SETULE X, SINTMAX -> SETGT X, -1
+ if ((Cond == ISD::SETULT && C1.isMinSignedValue()) ||
+ (Cond == ISD::SETULE && C1.isMaxSignedValue()))
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getAllOnesConstant(dl, N1.getValueType()),
+ ISD::SETGT);
}
}
@@ -3915,8 +4056,13 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
const APInt &C1 = N1C->getAPIntValue();
EVT ShValTy = N0.getValueType();
- // Fold bit comparisons when we can.
- if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ // Fold bit comparisons when we can. This will result in an
+ // incorrect value when boolean false is negative one, unless
+ // the bitsize is 1 in which case the false value is the same
+ // in practice regardless of the representation.
+ if ((VT.getSizeInBits() == 1 ||
+ getBooleanContents(N0.getValueType()) == ZeroOrOneBooleanContent) &&
+ (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
(VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
N0.getOpcode() == ISD::AND) {
if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
@@ -4312,8 +4458,8 @@ const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
}
SDValue TargetLowering::LowerAsmOutputForConstraint(
- SDValue &Chain, SDValue &Flag, SDLoc DL, const AsmOperandInfo &OpInfo,
- SelectionDAG &DAG) const {
+ SDValue &Chain, SDValue &Flag, const SDLoc &DL,
+ const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
return SDValue();
}
@@ -4887,9 +5033,15 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
return SDValue();
SDValue Shift, Factor;
- if (VT.isVector()) {
+ if (VT.isFixedLengthVector()) {
Shift = DAG.getBuildVector(ShVT, dl, Shifts);
Factor = DAG.getBuildVector(VT, dl, Factors);
+ } else if (VT.isScalableVector()) {
+ assert(Shifts.size() == 1 && Factors.size() == 1 &&
+ "Expected matchUnaryPredicate to return one element for scalable "
+ "vectors");
+ Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
+ Factor = DAG.getSplatVector(VT, dl, Factors[0]);
} else {
Shift = Shifts[0];
Factor = Factors[0];
@@ -4982,11 +5134,20 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
return SDValue();
SDValue MagicFactor, Factor, Shift, ShiftMask;
- if (VT.isVector()) {
+ if (VT.isFixedLengthVector()) {
MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
Factor = DAG.getBuildVector(VT, dl, Factors);
Shift = DAG.getBuildVector(ShVT, dl, Shifts);
ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
+ } else if (VT.isScalableVector()) {
+ assert(MagicFactors.size() == 1 && Factors.size() == 1 &&
+ Shifts.size() == 1 && ShiftMasks.size() == 1 &&
+ "Expected matchUnaryPredicate to return one element for scalable "
+ "vectors");
+ MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
+ Factor = DAG.getSplatVector(VT, dl, Factors[0]);
+ Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
+ ShiftMask = DAG.getSplatVector(VT, dl, ShiftMasks[0]);
} else {
MagicFactor = MagicFactors[0];
Factor = Factors[0];
@@ -5100,11 +5261,19 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
return SDValue();
SDValue PreShift, PostShift, MagicFactor, NPQFactor;
- if (VT.isVector()) {
+ if (VT.isFixedLengthVector()) {
PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
+ } else if (VT.isScalableVector()) {
+ assert(PreShifts.size() == 1 && MagicFactors.size() == 1 &&
+ NPQFactors.size() == 1 && PostShifts.size() == 1 &&
+ "Expected matchUnaryPredicate to return one for scalable vectors");
+ PreShift = DAG.getSplatVector(ShVT, dl, PreShifts[0]);
+ MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
+ NPQFactor = DAG.getSplatVector(VT, dl, NPQFactors[0]);
+ PostShift = DAG.getSplatVector(ShVT, dl, PostShifts[0]);
} else {
PreShift = PreShifts[0];
MagicFactor = MagicFactors[0];
@@ -5156,8 +5325,10 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
Created.push_back(Q.getNode());
+ EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+
SDValue One = DAG.getConstant(1, dl, VT);
- SDValue IsOne = DAG.getSetCC(dl, VT, N1, One, ISD::SETEQ);
+ SDValue IsOne = DAG.getSetCC(dl, SetCCVT, N1, One, ISD::SETEQ);
return DAG.getSelect(dl, VT, IsOne, N0, Q);
}
@@ -5584,7 +5755,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
return SDValue();
SDValue PVal, AVal, KVal, QVal;
- if (VT.isVector()) {
+ if (VT.isFixedLengthVector()) {
if (HadOneDivisor) {
// Try to turn PAmts into a splat, since we don't care about the values
// that are currently '0'. If we can't, just keep '0'`s.
@@ -5603,6 +5774,15 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
AVal = DAG.getBuildVector(VT, DL, AAmts);
KVal = DAG.getBuildVector(ShVT, DL, KAmts);
QVal = DAG.getBuildVector(VT, DL, QAmts);
+ } else if (VT.isScalableVector()) {
+ assert(PAmts.size() == 1 && AAmts.size() == 1 && KAmts.size() == 1 &&
+ QAmts.size() == 1 &&
+ "Expected matchUnaryPredicate to return one element for scalable "
+ "vectors");
+ PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
+ AVal = DAG.getSplatVector(VT, DL, AAmts[0]);
+ KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
+ QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
} else {
PVal = PAmts[0];
AVal = AAmts[0];
@@ -5697,6 +5877,28 @@ verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
return false;
}
+SDValue TargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
+ const DenormalMode &Mode) const {
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+ EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
+ // Testing it with denormal inputs to avoid wrong estimate.
+ if (Mode.Input == DenormalMode::IEEE) {
+ // This is specifically a check for the handling of denormal inputs,
+ // not the result.
+
+ // Test = fabs(X) < SmallestNormal
+ const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
+ APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
+ SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
+ SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
+ return DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
+ }
+ // Test = X == 0.0
+ return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
+}
+
SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
bool LegalOps, bool OptForSize,
NegatibleCost &Cost,
@@ -5941,7 +6143,7 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
// Legalization Utilities
//===----------------------------------------------------------------------===//
-bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, SDLoc dl,
+bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
SDValue LHS, SDValue RHS,
SmallVectorImpl<SDValue> &Result,
EVT HiLoVT, SelectionDAG &DAG,
@@ -5964,8 +6166,6 @@ bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, SDLoc dl,
unsigned OuterBitSize = VT.getScalarSizeInBits();
unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
- unsigned LHSSB = DAG.ComputeNumSignBits(LHS);
- unsigned RHSSB = DAG.ComputeNumSignBits(RHS);
// LL, LH, RL, and RH must be either all NULL or all set to a value.
assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
@@ -6014,8 +6214,9 @@ bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, SDLoc dl,
}
}
- if (!VT.isVector() && Opcode == ISD::MUL && LHSSB > InnerBitSize &&
- RHSSB > InnerBitSize) {
+ if (!VT.isVector() && Opcode == ISD::MUL &&
+ DAG.ComputeNumSignBits(LHS) > InnerBitSize &&
+ DAG.ComputeNumSignBits(RHS) > InnerBitSize) {
// The input values are both sign-extended.
// TODO non-MUL case?
if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
@@ -6129,7 +6330,7 @@ bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
SDValue LL, SDValue LH, SDValue RL,
SDValue RH) const {
SmallVector<SDValue, 2> Result;
- bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), N,
+ bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), SDLoc(N),
N->getOperand(0), N->getOperand(1), Result, HiLoVT,
DAG, Kind, LL, LH, RL, RH);
if (Ok) {
@@ -6141,7 +6342,7 @@ bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
}
// Check that (every element of) Z is undef or not an exact multiple of BW.
-static bool isNonZeroModBitWidth(SDValue Z, unsigned BW) {
+static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
return ISD::matchUnaryPredicate(
Z,
[=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(BW) != 0; },
@@ -6168,9 +6369,35 @@ bool TargetLowering::expandFunnelShift(SDNode *Node, SDValue &Result,
EVT ShVT = Z.getValueType();
+ // If a funnel shift in the other direction is more supported, use it.
+ unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL;
+ if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
+ isOperationLegalOrCustom(RevOpcode, VT) && isPowerOf2_32(BW)) {
+ if (isNonZeroModBitWidthOrUndef(Z, BW)) {
+ // fshl X, Y, Z -> fshr X, Y, -Z
+ // fshr X, Y, Z -> fshl X, Y, -Z
+ SDValue Zero = DAG.getConstant(0, DL, ShVT);
+ Z = DAG.getNode(ISD::SUB, DL, VT, Zero, Z);
+ } else {
+ // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
+ // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
+ SDValue One = DAG.getConstant(1, DL, ShVT);
+ if (IsFSHL) {
+ Y = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
+ X = DAG.getNode(ISD::SRL, DL, VT, X, One);
+ } else {
+ X = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
+ Y = DAG.getNode(ISD::SHL, DL, VT, Y, One);
+ }
+ Z = DAG.getNOT(DL, Z, ShVT);
+ }
+ Result = DAG.getNode(RevOpcode, DL, VT, X, Y, Z);
+ return true;
+ }
+
SDValue ShX, ShY;
SDValue ShAmt, InvShAmt;
- if (isNonZeroModBitWidth(Z, BW)) {
+ if (isNonZeroModBitWidthOrUndef(Z, BW)) {
// fshl: X << C | Y >> (BW - C)
// fshr: X << (BW - C) | Y >> C
// where C = Z % BW is not zero
@@ -6210,8 +6437,8 @@ bool TargetLowering::expandFunnelShift(SDNode *Node, SDValue &Result,
}
// TODO: Merge with expandFunnelShift.
-bool TargetLowering::expandROT(SDNode *Node, SDValue &Result,
- SelectionDAG &DAG) const {
+bool TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps,
+ SDValue &Result, SelectionDAG &DAG) const {
EVT VT = Node->getValueType(0);
unsigned EltSizeInBits = VT.getScalarSizeInBits();
bool IsLeft = Node->getOpcode() == ISD::ROTL;
@@ -6222,36 +6449,47 @@ bool TargetLowering::expandROT(SDNode *Node, SDValue &Result,
EVT ShVT = Op1.getValueType();
SDValue Zero = DAG.getConstant(0, DL, ShVT);
- assert(isPowerOf2_32(EltSizeInBits) && EltSizeInBits > 1 &&
- "Expecting the type bitwidth to be a power of 2");
-
// If a rotate in the other direction is supported, use it.
unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
- if (isOperationLegalOrCustom(RevRot, VT)) {
+ if (isOperationLegalOrCustom(RevRot, VT) && isPowerOf2_32(EltSizeInBits)) {
SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
Result = DAG.getNode(RevRot, DL, VT, Op0, Sub);
return true;
}
- if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
- !isOperationLegalOrCustom(ISD::SRL, VT) ||
- !isOperationLegalOrCustom(ISD::SUB, VT) ||
- !isOperationLegalOrCustomOrPromote(ISD::OR, VT) ||
- !isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
+ if (!AllowVectorOps && VT.isVector() &&
+ (!isOperationLegalOrCustom(ISD::SHL, VT) ||
+ !isOperationLegalOrCustom(ISD::SRL, VT) ||
+ !isOperationLegalOrCustom(ISD::SUB, VT) ||
+ !isOperationLegalOrCustomOrPromote(ISD::OR, VT) ||
+ !isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
return false;
- // Otherwise,
- // (rotl x, c) -> (or (shl x, (and c, w-1)), (srl x, (and -c, w-1)))
- // (rotr x, c) -> (or (srl x, (and c, w-1)), (shl x, (and -c, w-1)))
- //
unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
- SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
- SDValue And0 = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
- SDValue And1 = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
- Result = DAG.getNode(ISD::OR, DL, VT, DAG.getNode(ShOpc, DL, VT, Op0, And0),
- DAG.getNode(HsOpc, DL, VT, Op0, And1));
+ SDValue ShVal;
+ SDValue HsVal;
+ if (isPowerOf2_32(EltSizeInBits)) {
+ // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
+ // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
+ SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
+ SDValue ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
+ ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
+ SDValue HsAmt = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
+ HsVal = DAG.getNode(HsOpc, DL, VT, Op0, HsAmt);
+ } else {
+ // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
+ // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
+ SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
+ SDValue ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Op1, BitWidthC);
+ ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
+ SDValue HsAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthMinusOneC, ShAmt);
+ SDValue One = DAG.getConstant(1, DL, ShVT);
+ HsVal =
+ DAG.getNode(HsOpc, DL, VT, DAG.getNode(HsOpc, DL, VT, Op0, One), HsAmt);
+ }
+ Result = DAG.getNode(ISD::OR, DL, VT, ShVal, HsVal);
return true;
}
@@ -6270,7 +6508,7 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
if (Node->isStrictFPOpcode())
// When a NaN is converted to an integer a trap is allowed. We can't
// use this expansion here because it would eliminate that trap. Other
- // traps are also allowed and cannot be eliminated. See
+ // traps are also allowed and cannot be eliminated. See
// IEEE 754-2008 sec 5.8.
return false;
@@ -6341,7 +6579,7 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
// Only expand vector types if we have the appropriate vector bit operations.
- unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
+ unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
ISD::FP_TO_SINT;
if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||
!isOperationLegalOrCustomOrPromote(ISD::XOR, SrcVT)))
@@ -6356,14 +6594,19 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
if (APFloat::opOverflow &
APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
if (Node->isStrictFPOpcode()) {
- Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
- { Node->getOperand(0), Src });
+ Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
+ { Node->getOperand(0), Src });
Chain = Result.getValue(1);
} else
Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
return true;
}
+ // Don't expand it if there isn't cheap fsub instruction.
+ if (!isOperationLegalOrCustom(
+ Node->isStrictFPOpcode() ? ISD::STRICT_FSUB : ISD::FSUB, SrcVT))
+ return false;
+
SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
SDValue Sel;
@@ -6395,9 +6638,9 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
DAG.getConstant(SignMask, dl, DstVT));
SDValue SInt;
if (Node->isStrictFPOpcode()) {
- SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
+ SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
{ Chain, Src, FltOfs });
- SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
+ SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
{ Val.getValue(1), Val });
Chain = SInt.getValue(1);
} else {
@@ -6426,8 +6669,13 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
SDValue &Chain,
SelectionDAG &DAG) const {
- unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
- SDValue Src = Node->getOperand(OpNo);
+ // This transform is not correct for converting 0 when rounding mode is set
+ // to round toward negative infinity which will produce -0.0. So disable under
+ // strictfp.
+ if (Node->isStrictFPOpcode())
+ return false;
+
+ SDValue Src = Node->getOperand(0);
EVT SrcVT = Src.getValueType();
EVT DstVT = Node->getValueType(0);
@@ -6446,9 +6694,10 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
EVT ShiftVT = getShiftAmountTy(SrcVT, DAG.getDataLayout());
// Implementation of unsigned i64 to f64 following the algorithm in
- // __floatundidf in compiler_rt. This implementation has the advantage
- // of performing rounding correctly, both in the default rounding mode
- // and in all alternate rounding modes.
+ // __floatundidf in compiler_rt. This implementation performs rounding
+ // correctly in all rounding modes with the exception of converting 0
+ // when rounding toward negative infinity. In that case the fsub will produce
+ // -0.0. This will be added to +0.0 and produce -0.0 which is incorrect.
SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
BitsToDouble(UINT64_C(0x4530000000100000)), dl, DstVT);
@@ -6462,18 +6711,9 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
- if (Node->isStrictFPOpcode()) {
- SDValue HiSub =
- DAG.getNode(ISD::STRICT_FSUB, dl, {DstVT, MVT::Other},
- {Node->getOperand(0), HiFlt, TwoP84PlusTwoP52});
- Result = DAG.getNode(ISD::STRICT_FADD, dl, {DstVT, MVT::Other},
- {HiSub.getValue(1), LoFlt, HiSub});
- Chain = Result.getValue(1);
- } else {
- SDValue HiSub =
- DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
- Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
- }
+ SDValue HiSub =
+ DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
+ Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
return true;
}
@@ -6483,6 +6723,11 @@ SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
unsigned NewOp = Node->getOpcode() == ISD::FMINNUM ?
ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
EVT VT = Node->getValueType(0);
+
+ if (VT.isScalableVector())
+ report_fatal_error(
+ "Expanding fminnum/fmaxnum for scalable vectors is undefined.");
+
if (isOperationLegalOrCustom(NewOp, VT)) {
SDValue Quiet0 = Node->getOperand(0);
SDValue Quiet1 = Node->getOperand(1);
@@ -6706,23 +6951,58 @@ bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result,
}
bool TargetLowering::expandABS(SDNode *N, SDValue &Result,
- SelectionDAG &DAG) const {
+ SelectionDAG &DAG, bool IsNegative) const {
SDLoc dl(N);
EVT VT = N->getValueType(0);
EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
SDValue Op = N->getOperand(0);
+ // abs(x) -> smax(x,sub(0,x))
+ if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
+ isOperationLegal(ISD::SMAX, VT)) {
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ Result = DAG.getNode(ISD::SMAX, dl, VT, Op,
+ DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
+ return true;
+ }
+
+ // abs(x) -> umin(x,sub(0,x))
+ if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
+ isOperationLegal(ISD::UMIN, VT)) {
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ Result = DAG.getNode(ISD::UMIN, dl, VT, Op,
+ DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
+ return true;
+ }
+
+ // 0 - abs(x) -> smin(x, sub(0,x))
+ if (IsNegative && isOperationLegal(ISD::SUB, VT) &&
+ isOperationLegal(ISD::SMIN, VT)) {
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ Result = DAG.getNode(ISD::SMIN, dl, VT, Op,
+ DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
+ return true;
+ }
+
// Only expand vector types if we have the appropriate vector operations.
- if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SRA, VT) ||
- !isOperationLegalOrCustom(ISD::ADD, VT) ||
- !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
+ if (VT.isVector() &&
+ (!isOperationLegalOrCustom(ISD::SRA, VT) ||
+ (!IsNegative && !isOperationLegalOrCustom(ISD::ADD, VT)) ||
+ (IsNegative && !isOperationLegalOrCustom(ISD::SUB, VT)) ||
+ !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
return false;
SDValue Shift =
DAG.getNode(ISD::SRA, dl, VT, Op,
DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, ShVT));
- SDValue Add = DAG.getNode(ISD::ADD, dl, VT, Op, Shift);
- Result = DAG.getNode(ISD::XOR, dl, VT, Add, Shift);
+ if (!IsNegative) {
+ SDValue Add = DAG.getNode(ISD::ADD, dl, VT, Op, Shift);
+ Result = DAG.getNode(ISD::XOR, dl, VT, Add, Shift);
+ } else {
+ // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
+ SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
+ Result = DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
+ }
return true;
}
@@ -6736,6 +7016,9 @@ TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
EVT DstVT = LD->getValueType(0);
ISD::LoadExtType ExtType = LD->getExtensionType();
+ if (SrcVT.isScalableVector())
+ report_fatal_error("Cannot scalarize scalable vector loads");
+
unsigned NumElem = SrcVT.getVectorNumElements();
EVT SrcEltVT = SrcVT.getScalarType();
@@ -6762,7 +7045,7 @@ TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
// the codegen worse.
SDValue Load =
DAG.getExtLoad(ISD::EXTLOAD, SL, LoadVT, Chain, BasePTR,
- LD->getPointerInfo(), SrcIntVT, LD->getAlignment(),
+ LD->getPointerInfo(), SrcIntVT, LD->getOriginalAlign(),
LD->getMemOperand()->getFlags(), LD->getAAInfo());
SmallVector<SDValue, 8> Vals;
@@ -6799,10 +7082,10 @@ TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
SDValue ScalarLoad =
DAG.getExtLoad(ExtType, SL, DstEltVT, Chain, BasePTR,
LD->getPointerInfo().getWithOffset(Idx * Stride),
- SrcEltVT, MinAlign(LD->getAlignment(), Idx * Stride),
+ SrcEltVT, LD->getOriginalAlign(),
LD->getMemOperand()->getFlags(), LD->getAAInfo());
- BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, Stride);
+ BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, TypeSize::Fixed(Stride));
Vals.push_back(ScalarLoad.getValue(0));
LoadChains.push_back(ScalarLoad.getValue(1));
@@ -6823,6 +7106,9 @@ SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
SDValue Value = ST->getValue();
EVT StVT = ST->getMemoryVT();
+ if (StVT.isScalableVector())
+ report_fatal_error("Cannot scalarize scalable vector stores");
+
// The type of the data we want to save
EVT RegVT = Value.getValueType();
EVT RegSclVT = RegVT.getScalarType();
@@ -6859,7 +7145,7 @@ SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
}
return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
- ST->getAlignment(), ST->getMemOperand()->getFlags(),
+ ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
ST->getAAInfo());
}
@@ -6873,13 +7159,14 @@ SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
DAG.getVectorIdxConstant(Idx, SL));
- SDValue Ptr = DAG.getObjectPtrOffset(SL, BasePtr, Idx * Stride);
+ SDValue Ptr =
+ DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::Fixed(Idx * Stride));
// This scalar TruncStore may be illegal, but we legalize it later.
SDValue Store = DAG.getTruncStore(
Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
- MemSclVT, MinAlign(ST->getAlignment(), Idx * Stride),
- ST->getMemOperand()->getFlags(), ST->getAAInfo());
+ MemSclVT, ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
+ ST->getAAInfo());
Stores.push_back(Store);
}
@@ -6944,7 +7231,7 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
// Load one integer register's worth from the original location.
SDValue Load = DAG.getLoad(
RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
- MinAlign(LD->getAlignment(), Offset), LD->getMemOperand()->getFlags(),
+ LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
LD->getAAInfo());
// Follow the load with a store to the stack slot. Remember the store.
Stores.push_back(DAG.getStore(
@@ -6963,8 +7250,8 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
SDValue Load =
DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
LD->getPointerInfo().getWithOffset(Offset), MemVT,
- MinAlign(LD->getAlignment(), Offset),
- LD->getMemOperand()->getFlags(), LD->getAAInfo());
+ LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
+ LD->getAAInfo());
// Follow the load with a store to the stack slot. Remember the store.
// On big-endian machines this requires a truncating store to ensure
// that the bits end up in the right place.
@@ -6994,7 +7281,7 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
NumBits >>= 1;
- unsigned Alignment = LD->getAlignment();
+ Align Alignment = LD->getOriginalAlign();
unsigned IncrementSize = NumBits / 8;
ISD::LoadExtType HiExtType = LD->getExtensionType();
@@ -7009,21 +7296,21 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
LD->getAAInfo());
- Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
+ Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize));
Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
LD->getPointerInfo().getWithOffset(IncrementSize),
- NewLoadedVT, MinAlign(Alignment, IncrementSize),
- LD->getMemOperand()->getFlags(), LD->getAAInfo());
+ NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
+ LD->getAAInfo());
} else {
Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
LD->getAAInfo());
- Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
+ Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize));
Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
LD->getPointerInfo().getWithOffset(IncrementSize),
- NewLoadedVT, MinAlign(Alignment, IncrementSize),
- LD->getMemOperand()->getFlags(), LD->getAAInfo());
+ NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
+ LD->getAAInfo());
}
// aggregate the two parts
@@ -7047,7 +7334,7 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
SDValue Ptr = ST->getBasePtr();
SDValue Val = ST->getValue();
EVT VT = Val.getValueType();
- int Alignment = ST->getAlignment();
+ Align Alignment = ST->getOriginalAlign();
auto &MF = DAG.getMachineFunction();
EVT StoreMemVT = ST->getMemoryVT();
@@ -7104,7 +7391,7 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
// Store it to the final location. Remember the store.
Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
ST->getPointerInfo().getWithOffset(Offset),
- MinAlign(ST->getAlignment(), Offset),
+ ST->getOriginalAlign(),
ST->getMemOperand()->getFlags()));
// Increment the pointers.
Offset += RegBytes;
@@ -7126,7 +7413,7 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
Stores.push_back(
DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,
- MinAlign(ST->getAlignment(), Offset),
+ ST->getOriginalAlign(),
ST->getMemOperand()->getFlags(), ST->getAAInfo()));
// The order of the stores doesn't matter - say it with a TokenFactor.
SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
@@ -7137,8 +7424,8 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
"Unaligned store of unknown type.");
// Get the half-size VT
EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
- int NumBits = NewStoredVT.getSizeInBits();
- int IncrementSize = NumBits / 8;
+ unsigned NumBits = NewStoredVT.getFixedSizeInBits();
+ unsigned IncrementSize = NumBits / 8;
// Divide the stored value in two parts.
SDValue ShiftAmount = DAG.getConstant(
@@ -7153,8 +7440,7 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
ST->getMemOperand()->getFlags());
- Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
- Alignment = MinAlign(Alignment, IncrementSize);
+ Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize));
Store2 = DAG.getTruncStore(
Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
@@ -7173,9 +7459,12 @@ TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
SDValue Increment;
EVT AddrVT = Addr.getValueType();
EVT MaskVT = Mask.getValueType();
- assert(DataVT.getVectorNumElements() == MaskVT.getVectorNumElements() &&
+ assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
"Incompatible types of Data and Mask");
if (IsCompressedMemory) {
+ if (DataVT.isScalableVector())
+ report_fatal_error(
+ "Cannot currently handle compressed memory with scalable vectors");
// Incrementing the pointer according to number of '1's in the mask.
EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
@@ -7191,6 +7480,10 @@ TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
AddrVT);
Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
+ } else if (DataVT.isScalableVector()) {
+ Increment = DAG.getVScale(DL, AddrVT,
+ APInt(AddrVT.getFixedSizeInBits(),
+ DataVT.getStoreSize().getKnownMinSize()));
} else
Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT);
@@ -7201,16 +7494,26 @@ static SDValue clampDynamicVectorIndex(SelectionDAG &DAG,
SDValue Idx,
EVT VecVT,
const SDLoc &dl) {
- if (isa<ConstantSDNode>(Idx))
+ if (!VecVT.isScalableVector() && isa<ConstantSDNode>(Idx))
return Idx;
EVT IdxVT = Idx.getValueType();
- unsigned NElts = VecVT.getVectorNumElements();
- if (isPowerOf2_32(NElts)) {
- APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(),
- Log2_32(NElts));
- return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
- DAG.getConstant(Imm, dl, IdxVT));
+ unsigned NElts = VecVT.getVectorMinNumElements();
+ if (VecVT.isScalableVector()) {
+ SDValue VS = DAG.getVScale(dl, IdxVT,
+ APInt(IdxVT.getFixedSizeInBits(),
+ NElts));
+ SDValue Sub = DAG.getNode(ISD::SUB, dl, IdxVT, VS,
+ DAG.getConstant(1, dl, IdxVT));
+
+ return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx, Sub);
+ } else {
+ if (isPowerOf2_32(NElts)) {
+ APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(),
+ Log2_32(NElts));
+ return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
+ DAG.getConstant(Imm, dl, IdxVT));
+ }
}
return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
@@ -7227,8 +7530,8 @@ SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG,
EVT EltVT = VecVT.getVectorElementType();
// Calculate the element offset and add it to the pointer.
- unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size.
- assert(EltSize * 8 == EltVT.getSizeInBits() &&
+ unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size.
+ assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
"Converting bits to bytes lost precision");
Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl);
@@ -7306,6 +7609,65 @@ SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
return SDValue();
}
+// Convert redundant addressing modes (e.g. scaling is redundant
+// when accessing bytes).
+ISD::MemIndexType
+TargetLowering::getCanonicalIndexType(ISD::MemIndexType IndexType, EVT MemVT,
+ SDValue Offsets) const {
+ bool IsScaledIndex =
+ (IndexType == ISD::SIGNED_SCALED) || (IndexType == ISD::UNSIGNED_SCALED);
+ bool IsSignedIndex =
+ (IndexType == ISD::SIGNED_SCALED) || (IndexType == ISD::SIGNED_UNSCALED);
+
+ // Scaling is unimportant for bytes, canonicalize to unscaled.
+ if (IsScaledIndex && MemVT.getScalarType() == MVT::i8) {
+ IsScaledIndex = false;
+ IndexType = IsSignedIndex ? ISD::SIGNED_UNSCALED : ISD::UNSIGNED_UNSCALED;
+ }
+
+ return IndexType;
+}
+
+SDValue TargetLowering::expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const {
+ SDValue Op0 = Node->getOperand(0);
+ SDValue Op1 = Node->getOperand(1);
+ EVT VT = Op0.getValueType();
+ unsigned Opcode = Node->getOpcode();
+ SDLoc DL(Node);
+
+ // umin(x,y) -> sub(x,usubsat(x,y))
+ if (Opcode == ISD::UMIN && isOperationLegal(ISD::SUB, VT) &&
+ isOperationLegal(ISD::USUBSAT, VT)) {
+ return DAG.getNode(ISD::SUB, DL, VT, Op0,
+ DAG.getNode(ISD::USUBSAT, DL, VT, Op0, Op1));
+ }
+
+ // umax(x,y) -> add(x,usubsat(y,x))
+ if (Opcode == ISD::UMAX && isOperationLegal(ISD::ADD, VT) &&
+ isOperationLegal(ISD::USUBSAT, VT)) {
+ return DAG.getNode(ISD::ADD, DL, VT, Op0,
+ DAG.getNode(ISD::USUBSAT, DL, VT, Op1, Op0));
+ }
+
+ // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
+ ISD::CondCode CC;
+ switch (Opcode) {
+ default: llvm_unreachable("How did we get here?");
+ case ISD::SMAX: CC = ISD::SETGT; break;
+ case ISD::SMIN: CC = ISD::SETLT; break;
+ case ISD::UMAX: CC = ISD::SETUGT; break;
+ case ISD::UMIN: CC = ISD::SETULT; break;
+ }
+
+ // FIXME: Should really try to split the vector in case it's legal on a
+ // subvector.
+ if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
+ return DAG.UnrollVectorOp(Node);
+
+ SDValue Cond = DAG.getSetCC(DL, VT, Op0, Op1, CC);
+ return DAG.getSelect(DL, VT, Cond, Op0, Op1);
+}
+
SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
unsigned Opcode = Node->getOpcode();
SDValue LHS = Node->getOperand(0);
@@ -7317,12 +7679,13 @@ SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
assert(VT.isInteger() && "Expected operands to be integers");
// usub.sat(a, b) -> umax(a, b) - b
- if (Opcode == ISD::USUBSAT && isOperationLegalOrCustom(ISD::UMAX, VT)) {
+ if (Opcode == ISD::USUBSAT && isOperationLegal(ISD::UMAX, VT)) {
SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
}
- if (Opcode == ISD::UADDSAT && isOperationLegalOrCustom(ISD::UMIN, VT)) {
+ // uadd.sat(a, b) -> umin(a, ~b) + b
+ if (Opcode == ISD::UADDSAT && isOperationLegal(ISD::UMIN, VT)) {
SDValue InvRHS = DAG.getNOT(dl, RHS, VT);
SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS);
return DAG.getNode(ISD::ADD, dl, VT, Min, RHS);
@@ -7347,6 +7710,11 @@ SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
"addition or subtraction node.");
}
+ // FIXME: Should really try to split the vector in case it's legal on a
+ // subvector.
+ if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
+ return DAG.UnrollVectorOp(Node);
+
unsigned BitWidth = LHS.getScalarValueSizeInBits();
EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT),
@@ -7386,6 +7754,41 @@ SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
}
}
+SDValue TargetLowering::expandShlSat(SDNode *Node, SelectionDAG &DAG) const {
+ unsigned Opcode = Node->getOpcode();
+ bool IsSigned = Opcode == ISD::SSHLSAT;
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ EVT VT = LHS.getValueType();
+ SDLoc dl(Node);
+
+ assert((Node->getOpcode() == ISD::SSHLSAT ||
+ Node->getOpcode() == ISD::USHLSAT) &&
+ "Expected a SHLSAT opcode");
+ assert(VT == RHS.getValueType() && "Expected operands to be the same type");
+ assert(VT.isInteger() && "Expected operands to be integers");
+
+ // If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
+
+ unsigned BW = VT.getScalarSizeInBits();
+ SDValue Result = DAG.getNode(ISD::SHL, dl, VT, LHS, RHS);
+ SDValue Orig =
+ DAG.getNode(IsSigned ? ISD::SRA : ISD::SRL, dl, VT, Result, RHS);
+
+ SDValue SatVal;
+ if (IsSigned) {
+ SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(BW), dl, VT);
+ SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(BW), dl, VT);
+ SatVal = DAG.getSelectCC(dl, LHS, DAG.getConstant(0, dl, VT),
+ SatMin, SatMax, ISD::SETLT);
+ } else {
+ SatVal = DAG.getConstant(APInt::getMaxValue(BW), dl, VT);
+ }
+ Result = DAG.getSelectCC(dl, LHS, Orig, SatVal, Result, ISD::SETNE);
+
+ return Result;
+}
+
SDValue
TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
assert((Node->getOpcode() == ISD::SMULFIX ||
@@ -7759,7 +8162,7 @@ bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
if (isSigned) {
// The high part is obtained by SRA'ing all but one of the bits of low
// part.
- unsigned LoSize = VT.getSizeInBits();
+ unsigned LoSize = VT.getFixedSizeInBits();
HiLHS =
DAG.getNode(ISD::SRA, dl, VT, LHS,
DAG.getConstant(LoSize - 1, dl,
@@ -7818,7 +8221,7 @@ bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
// Truncate the result if SetCC returns a larger type than needed.
EVT RType = Node->getValueType(1);
- if (RType.getSizeInBits() < Overflow.getValueSizeInBits())
+ if (RType.bitsLT(Overflow.getValueType()))
Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);
assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
@@ -7828,32 +8231,14 @@ bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const {
SDLoc dl(Node);
- bool NoNaN = Node->getFlags().hasNoNaNs();
- unsigned BaseOpcode = 0;
- switch (Node->getOpcode()) {
- default: llvm_unreachable("Expected VECREDUCE opcode");
- case ISD::VECREDUCE_FADD: BaseOpcode = ISD::FADD; break;
- case ISD::VECREDUCE_FMUL: BaseOpcode = ISD::FMUL; break;
- case ISD::VECREDUCE_ADD: BaseOpcode = ISD::ADD; break;
- case ISD::VECREDUCE_MUL: BaseOpcode = ISD::MUL; break;
- case ISD::VECREDUCE_AND: BaseOpcode = ISD::AND; break;
- case ISD::VECREDUCE_OR: BaseOpcode = ISD::OR; break;
- case ISD::VECREDUCE_XOR: BaseOpcode = ISD::XOR; break;
- case ISD::VECREDUCE_SMAX: BaseOpcode = ISD::SMAX; break;
- case ISD::VECREDUCE_SMIN: BaseOpcode = ISD::SMIN; break;
- case ISD::VECREDUCE_UMAX: BaseOpcode = ISD::UMAX; break;
- case ISD::VECREDUCE_UMIN: BaseOpcode = ISD::UMIN; break;
- case ISD::VECREDUCE_FMAX:
- BaseOpcode = NoNaN ? ISD::FMAXNUM : ISD::FMAXIMUM;
- break;
- case ISD::VECREDUCE_FMIN:
- BaseOpcode = NoNaN ? ISD::FMINNUM : ISD::FMINIMUM;
- break;
- }
-
+ unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
SDValue Op = Node->getOperand(0);
EVT VT = Op.getValueType();
+ if (VT.isScalableVector())
+ report_fatal_error(
+ "Expanding reductions for scalable vectors is undefined.");
+
// Try to use a shuffle reduction for power of two vectors.
if (VT.isPow2VectorType()) {
while (VT.getVectorNumElements() > 1) {
@@ -7884,6 +8269,33 @@ SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const {
return Res;
}
+SDValue TargetLowering::expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const {
+ SDLoc dl(Node);
+ SDValue AccOp = Node->getOperand(0);
+ SDValue VecOp = Node->getOperand(1);
+ SDNodeFlags Flags = Node->getFlags();
+
+ EVT VT = VecOp.getValueType();
+ EVT EltVT = VT.getVectorElementType();
+
+ if (VT.isScalableVector())
+ report_fatal_error(
+ "Expanding reductions for scalable vectors is undefined.");
+
+ unsigned NumElts = VT.getVectorNumElements();
+
+ SmallVector<SDValue, 8> Ops;
+ DAG.ExtractVectorElements(VecOp, Ops, 0, NumElts);
+
+ unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
+
+ SDValue Res = AccOp;
+ for (unsigned i = 0; i < NumElts; i++)
+ Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Flags);
+
+ return Res;
+}
+
bool TargetLowering::expandREM(SDNode *Node, SDValue &Result,
SelectionDAG &DAG) const {
EVT VT = Node->getValueType(0);
@@ -7906,3 +8318,105 @@ bool TargetLowering::expandREM(SDNode *Node, SDValue &Result,
}
return false;
}
+
+SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node,
+ SelectionDAG &DAG) const {
+ bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
+ SDLoc dl(SDValue(Node, 0));
+ SDValue Src = Node->getOperand(0);
+
+ // DstVT is the result type, while SatVT is the size to which we saturate
+ EVT SrcVT = Src.getValueType();
+ EVT DstVT = Node->getValueType(0);
+
+ unsigned SatWidth = Node->getConstantOperandVal(1);
+ unsigned DstWidth = DstVT.getScalarSizeInBits();
+ assert(SatWidth <= DstWidth &&
+ "Expected saturation width smaller than result width");
+
+ // Determine minimum and maximum integer values and their corresponding
+ // floating-point values.
+ APInt MinInt, MaxInt;
+ if (IsSigned) {
+ MinInt = APInt::getSignedMinValue(SatWidth).sextOrSelf(DstWidth);
+ MaxInt = APInt::getSignedMaxValue(SatWidth).sextOrSelf(DstWidth);
+ } else {
+ MinInt = APInt::getMinValue(SatWidth).zextOrSelf(DstWidth);
+ MaxInt = APInt::getMaxValue(SatWidth).zextOrSelf(DstWidth);
+ }
+
+ // We cannot risk emitting FP_TO_XINT nodes with a source VT of f16, as
+ // libcall emission cannot handle this. Large result types will fail.
+ if (SrcVT == MVT::f16) {
+ Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Src);
+ SrcVT = Src.getValueType();
+ }
+
+ APFloat MinFloat(DAG.EVTToAPFloatSemantics(SrcVT));
+ APFloat MaxFloat(DAG.EVTToAPFloatSemantics(SrcVT));
+
+ APFloat::opStatus MinStatus =
+ MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
+ APFloat::opStatus MaxStatus =
+ MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
+ bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
+ !(MaxStatus & APFloat::opStatus::opInexact);
+
+ SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT);
+ SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT);
+
+ // If the integer bounds are exactly representable as floats and min/max are
+ // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
+ // of comparisons and selects.
+ bool MinMaxLegal = isOperationLegal(ISD::FMINNUM, SrcVT) &&
+ isOperationLegal(ISD::FMAXNUM, SrcVT);
+ if (AreExactFloatBounds && MinMaxLegal) {
+ SDValue Clamped = Src;
+
+ // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
+ Clamped = DAG.getNode(ISD::FMAXNUM, dl, SrcVT, Clamped, MinFloatNode);
+ // Clamp by MaxFloat from above. NaN cannot occur.
+ Clamped = DAG.getNode(ISD::FMINNUM, dl, SrcVT, Clamped, MaxFloatNode);
+ // Convert clamped value to integer.
+ SDValue FpToInt = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
+ dl, DstVT, Clamped);
+
+ // In the unsigned case we're done, because we mapped NaN to MinFloat,
+ // which will cast to zero.
+ if (!IsSigned)
+ return FpToInt;
+
+ // Otherwise, select 0 if Src is NaN.
+ SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
+ return DAG.getSelectCC(dl, Src, Src, ZeroInt, FpToInt,
+ ISD::CondCode::SETUO);
+ }
+
+ SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT);
+ SDValue MaxIntNode = DAG.getConstant(MaxInt, dl, DstVT);
+
+ // Result of direct conversion. The assumption here is that the operation is
+ // non-trapping and it's fine to apply it to an out-of-range value if we
+ // select it away later.
+ SDValue FpToInt =
+ DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, DstVT, Src);
+
+ SDValue Select = FpToInt;
+
+ // If Src ULT MinFloat, select MinInt. In particular, this also selects
+ // MinInt if Src is NaN.
+ Select = DAG.getSelectCC(dl, Src, MinFloatNode, MinIntNode, Select,
+ ISD::CondCode::SETULT);
+ // If Src OGT MaxFloat, select MaxInt.
+ Select = DAG.getSelectCC(dl, Src, MaxFloatNode, MaxIntNode, Select,
+ ISD::CondCode::SETOGT);
+
+ // In the unsigned case we are done, because we mapped NaN to MinInt, which
+ // is already zero.
+ if (!IsSigned)
+ return Select;
+
+ // Otherwise, select 0 if Src is NaN.
+ SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
+ return DAG.getSelectCC(dl, Src, Src, ZeroInt, Select, ISD::CondCode::SETUO);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ShrinkWrap.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ShrinkWrap.cpp
index ce43fb1fbd4b..f89069e9f728 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ShrinkWrap.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ShrinkWrap.cpp
@@ -144,7 +144,7 @@ class ShrinkWrap : public MachineFunctionPass {
unsigned FrameDestroyOpcode;
/// Stack pointer register, used by llvm.{savestack,restorestack}
- unsigned SP;
+ Register SP;
/// Entry block.
const MachineBasicBlock *Entry;
@@ -331,11 +331,7 @@ void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB,
Save = &MBB;
else
Save = MDT->findNearestCommonDominator(Save, &MBB);
-
- if (!Save) {
- LLVM_DEBUG(dbgs() << "Found a block that is not reachable from Entry\n");
- return;
- }
+ assert(Save);
if (!Restore)
Restore = &MBB;
@@ -381,7 +377,7 @@ void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB,
// C. Save and Restore are in the same loop.
bool SaveDominatesRestore = false;
bool RestorePostDominatesSave = false;
- while (Save && Restore &&
+ while (Restore &&
(!(SaveDominatesRestore = MDT->dominates(Save, Restore)) ||
!(RestorePostDominatesSave = MPDT->dominates(Restore, Save)) ||
// Post-dominance is not enough in loops to ensure that all uses/defs
@@ -412,8 +408,7 @@ void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB,
Restore = MPDT->findNearestCommonDominator(Restore, Save);
// Fix (C).
- if (Save && Restore &&
- (MLI->getLoopFor(Save) || MLI->getLoopFor(Restore))) {
+ if (Restore && (MLI->getLoopFor(Save) || MLI->getLoopFor(Restore))) {
if (MLI->getLoopDepth(Save) > MLI->getLoopDepth(Restore)) {
// Push Save outside of this loop if immediate dominator is different
// from save block. If immediate dominator is not different, bail out.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SjLjEHPrepare.cpp
index 0683058f177e..d2fd4a6d8fd9 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SjLjEHPrepare.cpp
@@ -142,7 +142,7 @@ static void MarkBlocksLiveIn(BasicBlock *BB,
/// instruction with those returned by the personality function.
void SjLjEHPrepare::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal,
Value *SelVal) {
- SmallVector<Value *, 8> UseWorkList(LPI->user_begin(), LPI->user_end());
+ SmallVector<Value *, 8> UseWorkList(LPI->users());
while (!UseWorkList.empty()) {
Value *Val = UseWorkList.pop_back_val();
auto *EVI = dyn_cast<ExtractValueInst>(Val);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SpillPlacement.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SpillPlacement.cpp
index 36a0ddf67b19..4bb50a285497 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SpillPlacement.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SpillPlacement.cpp
@@ -27,10 +27,7 @@
//===----------------------------------------------------------------------===//
#include "SpillPlacement.h"
-#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/SparseSet.h"
#include "llvm/CodeGen/EdgeBundles.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
@@ -39,7 +36,6 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
-#include "llvm/Support/BlockFrequency.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp
index 8dec620536a7..a6a3149ae25b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp
@@ -12,28 +12,18 @@
//===----------------------------------------------------------------------===//
#include "SplitKit.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/CodeGen/LiveInterval.h"
-#include "llvm/CodeGen/LiveIntervalCalc.h"
-#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveRangeEdit.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -41,10 +31,8 @@
#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/DebugLoc.h"
-#include "llvm/MC/LaneBitmask.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/BlockFrequency.h"
-#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -53,7 +41,6 @@
#include <iterator>
#include <limits>
#include <tuple>
-#include <utility>
using namespace llvm;
@@ -181,7 +168,7 @@ void SplitAnalysis::analyzeUses() {
// Get use slots form the use-def chain.
const MachineRegisterInfo &MRI = MF.getRegInfo();
- for (MachineOperand &MO : MRI.use_nodbg_operands(CurLI->reg))
+ for (MachineOperand &MO : MRI.use_nodbg_operands(CurLI->reg()))
if (!MO.isUndef())
UseSlots.push_back(LIS.getInstructionIndex(*MO.getParent()).getRegSlot());
@@ -346,7 +333,7 @@ unsigned SplitAnalysis::countLiveBlocks(const LiveInterval *cli) const {
}
bool SplitAnalysis::isOriginalEndpoint(SlotIndex Idx) const {
- unsigned OrigReg = VRM.getOriginal(CurLI->reg);
+ unsigned OrigReg = VRM.getOriginal(CurLI->reg());
const LiveInterval &Orig = LIS.getInterval(OrigReg);
assert(!Orig.empty() && "Splitting empty interval?");
LiveInterval::const_iterator I = Orig.find(Idx);
@@ -412,10 +399,18 @@ LLVM_DUMP_METHOD void SplitEditor::dump() const {
}
#endif
+LiveInterval::SubRange &SplitEditor::getSubRangeForMaskExact(LaneBitmask LM,
+ LiveInterval &LI) {
+ for (LiveInterval::SubRange &S : LI.subranges())
+ if (S.LaneMask == LM)
+ return S;
+ llvm_unreachable("SubRange for this mask not found");
+}
+
LiveInterval::SubRange &SplitEditor::getSubRangeForMask(LaneBitmask LM,
LiveInterval &LI) {
for (LiveInterval::SubRange &S : LI.subranges())
- if (S.LaneMask == LM)
+ if ((S.LaneMask & LM) == LM)
return S;
llvm_unreachable("SubRange for this mask not found");
}
@@ -446,7 +441,7 @@ void SplitEditor::addDeadDef(LiveInterval &LI, VNInfo *VNI, bool Original) {
LaneBitmask LM;
for (const MachineOperand &DefOp : DefMI->defs()) {
Register R = DefOp.getReg();
- if (R != LI.reg)
+ if (R != LI.reg())
continue;
if (unsigned SR = DefOp.getSubReg())
LM |= TRI.getSubRegIndexLaneMask(SR);
@@ -517,7 +512,7 @@ void SplitEditor::forceRecompute(unsigned RegIdx, const VNInfo &ParentVNI) {
VFP = ValueForcePair(nullptr, true);
}
-SlotIndex SplitEditor::buildSingleSubRegCopy(unsigned FromReg, unsigned ToReg,
+SlotIndex SplitEditor::buildSingleSubRegCopy(Register FromReg, Register ToReg,
MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
unsigned SubIdx, LiveInterval &DestLI, bool Late, SlotIndex Def) {
const MCInstrDesc &Desc = TII.get(TargetOpcode::COPY);
@@ -543,7 +538,7 @@ SlotIndex SplitEditor::buildSingleSubRegCopy(unsigned FromReg, unsigned ToReg,
return Def;
}
-SlotIndex SplitEditor::buildCopy(unsigned FromReg, unsigned ToReg,
+SlotIndex SplitEditor::buildCopy(Register FromReg, Register ToReg,
LaneBitmask LaneMask, MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertBefore, bool Late, unsigned RegIdx) {
const MCInstrDesc &Desc = TII.get(TargetOpcode::COPY);
@@ -649,7 +644,7 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx,
LiveInterval &OrigLI = LIS.getInterval(Original);
VNInfo *OrigVNI = OrigLI.getVNInfoAt(UseIdx);
- unsigned Reg = LI->reg;
+ Register Reg = LI->reg();
bool DidRemat = false;
if (OrigVNI) {
LiveRangeEdit::Remat RM(ParentVNI);
@@ -662,16 +657,25 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx,
}
if (!DidRemat) {
LaneBitmask LaneMask;
- if (LI->hasSubRanges()) {
+ if (OrigLI.hasSubRanges()) {
LaneMask = LaneBitmask::getNone();
- for (LiveInterval::SubRange &S : LI->subranges())
- LaneMask |= S.LaneMask;
+ for (LiveInterval::SubRange &S : OrigLI.subranges()) {
+ if (S.liveAt(UseIdx))
+ LaneMask |= S.LaneMask;
+ }
} else {
LaneMask = LaneBitmask::getAll();
}
- ++NumCopies;
- Def = buildCopy(Edit->getReg(), Reg, LaneMask, MBB, I, Late, RegIdx);
+ if (LaneMask.none()) {
+ const MCInstrDesc &Desc = TII.get(TargetOpcode::IMPLICIT_DEF);
+ MachineInstr *ImplicitDef = BuildMI(MBB, I, DebugLoc(), Desc, Reg);
+ SlotIndexes &Indexes = *LIS.getSlotIndexes();
+ Def = Indexes.insertMachineInstrInMaps(*ImplicitDef, Late).getRegSlot();
+ } else {
+ ++NumCopies;
+ Def = buildCopy(Edit->getReg(), Reg, LaneMask, MBB, I, Late, RegIdx);
+ }
}
// Define the value in Reg.
@@ -994,9 +998,7 @@ void SplitEditor::computeRedundantBackCopies(
}
if (!DominatedVNIs.empty()) {
forceRecompute(0, *ParentVNI);
- for (auto VNI : DominatedVNIs) {
- BackCopies.push_back(VNI);
- }
+ append_range(BackCopies, DominatedVNIs);
DominatedVNIs.clear();
}
}
@@ -1257,8 +1259,8 @@ void SplitEditor::extendPHIRange(MachineBasicBlock &B, LiveIntervalCalc &LIC,
LiveInterval &PLI = Edit->getParent();
// Need the cast because the inputs to ?: would otherwise be deemed
// "incompatible": SubRange vs LiveInterval.
- LiveRange &PSR = !LM.all() ? getSubRangeForMask(LM, PLI)
- : static_cast<LiveRange&>(PLI);
+ LiveRange &PSR = !LM.all() ? getSubRangeForMaskExact(LM, PLI)
+ : static_cast<LiveRange &>(PLI);
if (PSR.liveAt(LastUse))
LIC.extend(LR, End, /*PhysReg=*/0, Undefs);
}
@@ -1293,7 +1295,7 @@ void SplitEditor::extendPHIKillRanges() {
continue;
unsigned RegIdx = RegAssign.lookup(V->def);
LiveInterval &LI = LIS.getInterval(Edit->get(RegIdx));
- LiveInterval::SubRange &S = getSubRangeForMask(PS.LaneMask, LI);
+ LiveInterval::SubRange &S = getSubRangeForMaskExact(PS.LaneMask, LI);
if (removeDeadSegment(V->def, S))
continue;
@@ -1342,7 +1344,7 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) {
// Rewrite to the mapped register at Idx.
unsigned RegIdx = RegAssign.lookup(Idx);
LiveInterval &LI = LIS.getInterval(Edit->get(RegIdx));
- MO.setReg(LI.reg);
+ MO.setReg(LI.reg());
LLVM_DEBUG(dbgs() << " rewr " << printMBBReference(*MI->getParent())
<< '\t' << Idx << ':' << RegIdx << '\t' << *MI);
@@ -1402,7 +1404,7 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) {
}
}
- for (unsigned R : *Edit) {
+ for (Register R : *Edit) {
LiveInterval &LI = LIS.getInterval(R);
if (!LI.hasSubRanges())
continue;
@@ -1424,7 +1426,7 @@ void SplitEditor::deleteRematVictims() {
continue;
MachineInstr *MI = LIS.getInstructionFromIndex(S.valno->def);
assert(MI && "Missing instruction for dead def");
- MI->addRegisterDead(LI->reg, &TRI);
+ MI->addRegisterDead(LI->reg(), &TRI);
if (!MI->allDefsAreDead())
continue;
@@ -1521,7 +1523,7 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
deleteRematVictims();
// Get rid of unused values and set phi-kill flags.
- for (unsigned Reg : *Edit) {
+ for (Register Reg : *Edit) {
LiveInterval &LI = LIS.getInterval(Reg);
LI.removeEmptySubRanges();
LI.RenumberValues();
@@ -1538,13 +1540,13 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
ConnectedVNInfoEqClasses ConEQ(LIS);
for (unsigned i = 0, e = Edit->size(); i != e; ++i) {
// Don't use iterators, they are invalidated by create() below.
- unsigned VReg = Edit->get(i);
+ Register VReg = Edit->get(i);
LiveInterval &LI = LIS.getInterval(VReg);
SmallVector<LiveInterval*, 8> SplitLIs;
LIS.splitSeparateComponents(LI, SplitLIs);
- unsigned Original = VRM.getOriginal(VReg);
+ Register Original = VRM.getOriginal(VReg);
for (LiveInterval *SplitLI : SplitLIs)
- VRM.setIsSplitFromReg(SplitLI->reg, Original);
+ VRM.setIsSplitFromReg(SplitLI->reg(), Original);
// The new intervals all map back to i.
if (LRMap)
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.h b/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.h
index 3ab5f2585f34..a94518f5a4fc 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.h
@@ -345,10 +345,17 @@ private:
return LICalc[SpillMode != SM_Partition && RegIdx != 0];
}
- /// Find a subrange corresponding to the lane mask @p LM in the live
+ /// Find a subrange corresponding to the exact lane mask @p LM in the live
/// interval @p LI. The interval @p LI is assumed to contain such a subrange.
/// This function is used to find corresponding subranges between the
/// original interval and the new intervals.
+ LiveInterval::SubRange &getSubRangeForMaskExact(LaneBitmask LM,
+ LiveInterval &LI);
+
+ /// Find a subrange corresponding to the lane mask @p LM, or a superset of it,
+ /// in the live interval @p LI. The interval @p LI is assumed to contain such
+ /// a subrange. This function is used to find corresponding subranges between
+ /// the original interval and the new intervals.
LiveInterval::SubRange &getSubRangeForMask(LaneBitmask LM, LiveInterval &LI);
/// Add a segment to the interval LI for the value number VNI. If LI has
@@ -432,11 +439,11 @@ private:
/// Add a copy instruction copying \p FromReg to \p ToReg before
/// \p InsertBefore. This can be invoked with a \p LaneMask which may make it
/// necessary to construct a sequence of copies to cover it exactly.
- SlotIndex buildCopy(unsigned FromReg, unsigned ToReg, LaneBitmask LaneMask,
+ SlotIndex buildCopy(Register FromReg, Register ToReg, LaneBitmask LaneMask,
MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
bool Late, unsigned RegIdx);
- SlotIndex buildSingleSubRegCopy(unsigned FromReg, unsigned ToReg,
+ SlotIndex buildSingleSubRegCopy(Register FromReg, Register ToReg,
MachineBasicBlock &MB, MachineBasicBlock::iterator InsertBefore,
unsigned SubIdx, LiveInterval &DestLI, bool Late, SlotIndex Def);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackColoring.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackColoring.cpp
index d720d93c306d..af58204f6db5 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/StackColoring.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/StackColoring.cpp
@@ -373,6 +373,36 @@ STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region");
// before visiting the memcpy block (which will contain the lifetime start
// for "b" then it will appear that 'b' has a degenerate lifetime.
//
+// Handle Windows Exception with LifetimeStartOnFirstUse:
+// -----------------
+//
+// There was a bug for using LifetimeStartOnFirstUse in win32.
+// class Type1 {
+// ...
+// ~Type1(){ write memory;}
+// }
+// ...
+// try{
+// Type1 V
+// ...
+// } catch (Type2 X){
+// ...
+// }
+// For variable X in catch(X), we put point pX=&(&X) into ConservativeSlots
+// to prevent using LifetimeStartOnFirstUse. Because pX may merged with
+// object V which may call destructor after implicitly writing pX. All these
+// are done in C++ EH runtime libs (through CxxThrowException), and can't
+// obviously check it in IR level.
+//
+// The loader of pX, without obvious writing IR, is usually the first LOAD MI
+// in EHPad, Some like:
+// bb.x.catch.i (landing-pad, ehfunclet-entry):
+// ; predecessors: %bb...
+// successors: %bb...
+// %n:gr32 = MOV32rm %stack.pX ...
+// ...
+// The Type2** %stack.pX will only be written in EH runtime libs, so we
+// check the StoreSlots to screen it out.
namespace {
@@ -434,6 +464,9 @@ class StackColoring : public MachineFunctionPass {
/// slots lifetime-start-on-first-use is disabled).
BitVector ConservativeSlots;
+ /// Record the FI slots referenced by a 'may write to memory'.
+ BitVector StoreSlots;
+
/// Number of iterations taken during data flow analysis.
unsigned NumIterations;
@@ -629,10 +662,13 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) {
InterestingSlots.resize(NumSlot);
ConservativeSlots.clear();
ConservativeSlots.resize(NumSlot);
+ StoreSlots.clear();
+ StoreSlots.resize(NumSlot);
// number of start and end lifetime ops for each slot
SmallVector<int, 8> NumStartLifetimes(NumSlot, 0);
SmallVector<int, 8> NumEndLifetimes(NumSlot, 0);
+ SmallVector<int, 8> NumLoadInCatchPad(NumSlot, 0);
// Step 1: collect markers and populate the "InterestingSlots"
// and "ConservativeSlots" sets.
@@ -687,6 +723,13 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) {
if (! BetweenStartEnd.test(Slot)) {
ConservativeSlots.set(Slot);
}
+ // Here we check the StoreSlots to screen catch point out. For more
+ // information, please refer "Handle Windows Exception with
+ // LifetimeStartOnFirstUse" at the head of this file.
+ if (MI.mayStore())
+ StoreSlots.set(Slot);
+ if (MF->getWinEHFuncInfo() && MBB->isEHPad() && MI.mayLoad())
+ NumLoadInCatchPad[Slot] += 1;
}
}
}
@@ -697,11 +740,14 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) {
return 0;
}
- // PR27903: slots with multiple start or end lifetime ops are not
+ // 1) PR27903: slots with multiple start or end lifetime ops are not
// safe to enable for "lifetime-start-on-first-use".
- for (unsigned slot = 0; slot < NumSlot; ++slot)
- if (NumStartLifetimes[slot] > 1 || NumEndLifetimes[slot] > 1)
+ // 2) And also not safe for variable X in catch(X) in windows.
+ for (unsigned slot = 0; slot < NumSlot; ++slot) {
+ if (NumStartLifetimes[slot] > 1 || NumEndLifetimes[slot] > 1 ||
+ (NumLoadInCatchPad[slot] > 1 && !StoreSlots.test(slot)))
ConservativeSlots.set(slot);
+ }
LLVM_DEBUG(dumpBV("Conservative slots", ConservativeSlots));
// Step 2: compute begin/end sets for each block
@@ -1048,7 +1094,7 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
if (MMO->getAAInfo()) {
if (const Value *MMOV = MMO->getValue()) {
SmallVector<Value *, 4> Objs;
- getUnderlyingObjectsForCodeGen(MMOV, Objs, MF->getDataLayout());
+ getUnderlyingObjectsForCodeGen(MMOV, Objs);
if (Objs.empty())
MayHaveConflictingAAMD = true;
@@ -1241,7 +1287,7 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
// This is a simple greedy algorithm for merging allocas. First, sort the
// slots, placing the largest slots first. Next, perform an n^2 scan and look
- // for disjoint slots. When you find disjoint slots, merge the samller one
+ // for disjoint slots. When you find disjoint slots, merge the smaller one
// into the bigger one and update the live interval. Remove the small alloca
// and continue.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackMaps.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackMaps.cpp
index 1e060ecbeb43..faf07e90c39c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/StackMaps.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/StackMaps.cpp
@@ -45,6 +45,14 @@ static cl::opt<int> StackMapVersion(
const char *StackMaps::WSMP = "Stack Maps: ";
+static uint64_t getConstMetaVal(const MachineInstr &MI, unsigned Idx) {
+ assert(MI.getOperand(Idx).isImm() &&
+ MI.getOperand(Idx).getImm() == StackMaps::ConstantOp);
+ const auto &MO = MI.getOperand(Idx + 1);
+ assert(MO.isImm());
+ return MO.getImm();
+}
+
StackMapOpers::StackMapOpers(const MachineInstr *MI)
: MI(MI) {
assert(getVarIdx() <= MI->getNumOperands() &&
@@ -83,11 +91,89 @@ unsigned PatchPointOpers::getNextScratchIdx(unsigned StartIdx) const {
return ScratchIdx;
}
+unsigned StatepointOpers::getNumGcMapEntriesIdx() {
+ // Take index of num of allocas and skip all allocas records.
+ unsigned CurIdx = getNumAllocaIdx();
+ unsigned NumAllocas = getConstMetaVal(*MI, CurIdx - 1);
+ CurIdx++;
+ while (NumAllocas--)
+ CurIdx = StackMaps::getNextMetaArgIdx(MI, CurIdx);
+ return CurIdx + 1; // skip <StackMaps::ConstantOp>
+}
+
+unsigned StatepointOpers::getNumAllocaIdx() {
+ // Take index of num of gc ptrs and skip all gc ptr records.
+ unsigned CurIdx = getNumGCPtrIdx();
+ unsigned NumGCPtrs = getConstMetaVal(*MI, CurIdx - 1);
+ CurIdx++;
+ while (NumGCPtrs--)
+ CurIdx = StackMaps::getNextMetaArgIdx(MI, CurIdx);
+ return CurIdx + 1; // skip <StackMaps::ConstantOp>
+}
+
+unsigned StatepointOpers::getNumGCPtrIdx() {
+ // Take index of num of deopt args and skip all deopt records.
+ unsigned CurIdx = getNumDeoptArgsIdx();
+ unsigned NumDeoptArgs = getConstMetaVal(*MI, CurIdx - 1);
+ CurIdx++;
+ while (NumDeoptArgs--) {
+ CurIdx = StackMaps::getNextMetaArgIdx(MI, CurIdx);
+ }
+ return CurIdx + 1; // skip <StackMaps::ConstantOp>
+}
+
+int StatepointOpers::getFirstGCPtrIdx() {
+ unsigned NumGCPtrsIdx = getNumGCPtrIdx();
+ unsigned NumGCPtrs = getConstMetaVal(*MI, NumGCPtrsIdx - 1);
+ if (NumGCPtrs == 0)
+ return -1;
+ ++NumGCPtrsIdx; // skip <num gc ptrs>
+ assert(NumGCPtrsIdx < MI->getNumOperands());
+ return (int)NumGCPtrsIdx;
+}
+
+unsigned StatepointOpers::getGCPointerMap(
+ SmallVectorImpl<std::pair<unsigned, unsigned>> &GCMap) {
+ unsigned CurIdx = getNumGcMapEntriesIdx();
+ unsigned GCMapSize = getConstMetaVal(*MI, CurIdx - 1);
+ CurIdx++;
+ for (unsigned N = 0; N < GCMapSize; ++N) {
+ unsigned B = MI->getOperand(CurIdx++).getImm();
+ unsigned D = MI->getOperand(CurIdx++).getImm();
+ GCMap.push_back(std::make_pair(B, D));
+ }
+
+ return GCMapSize;
+}
+
StackMaps::StackMaps(AsmPrinter &AP) : AP(AP) {
if (StackMapVersion != 3)
llvm_unreachable("Unsupported stackmap version!");
}
+unsigned StackMaps::getNextMetaArgIdx(const MachineInstr *MI, unsigned CurIdx) {
+ assert(CurIdx < MI->getNumOperands() && "Bad meta arg index");
+ const auto &MO = MI->getOperand(CurIdx);
+ if (MO.isImm()) {
+ switch (MO.getImm()) {
+ default:
+ llvm_unreachable("Unrecognized operand type.");
+ case StackMaps::DirectMemRefOp:
+ CurIdx += 2;
+ break;
+ case StackMaps::IndirectMemRefOp:
+ CurIdx += 3;
+ break;
+ case StackMaps::ConstantOp:
+ ++CurIdx;
+ break;
+ }
+ }
+ ++CurIdx;
+ assert(CurIdx < MI->getNumOperands() && "points past operand list");
+ return CurIdx;
+}
+
/// Go up the super-register chain until we hit a valid dwarf register number.
static unsigned getDwarfRegNum(unsigned Reg, const TargetRegisterInfo *TRI) {
int RegNum = TRI->getDwarfRegNum(Reg, false);
@@ -148,6 +234,12 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI,
if (MOI->isImplicit())
return ++MOI;
+ if (MOI->isUndef()) {
+ // Record `undef` register as constant. Use same value as ISel uses.
+ Locs.emplace_back(Location::Constant, sizeof(int64_t), 0, 0xFEFEFEFE);
+ return ++MOI;
+ }
+
assert(Register::isPhysicalRegister(MOI->getReg()) &&
"Virtreg operands should have been rewritten before now.");
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(MOI->getReg());
@@ -286,14 +378,82 @@ StackMaps::parseRegisterLiveOutMask(const uint32_t *Mask) const {
}
}
- LiveOuts.erase(
- llvm::remove_if(LiveOuts,
- [](const LiveOutReg &LO) { return LO.Reg == 0; }),
- LiveOuts.end());
+ llvm::erase_if(LiveOuts, [](const LiveOutReg &LO) { return LO.Reg == 0; });
return LiveOuts;
}
+// See statepoint MI format description in StatepointOpers' class comment
+// in include/llvm/CodeGen/StackMaps.h
+void StackMaps::parseStatepointOpers(const MachineInstr &MI,
+ MachineInstr::const_mop_iterator MOI,
+ MachineInstr::const_mop_iterator MOE,
+ LocationVec &Locations,
+ LiveOutVec &LiveOuts) {
+ LLVM_DEBUG(dbgs() << "record statepoint : " << MI << "\n");
+ StatepointOpers SO(&MI);
+ MOI = parseOperand(MOI, MOE, Locations, LiveOuts); // CC
+ MOI = parseOperand(MOI, MOE, Locations, LiveOuts); // Flags
+ MOI = parseOperand(MOI, MOE, Locations, LiveOuts); // Num Deopts
+
+ // Record Deopt Args.
+ unsigned NumDeoptArgs = Locations.back().Offset;
+ assert(Locations.back().Type == Location::Constant);
+ assert(NumDeoptArgs == SO.getNumDeoptArgs());
+
+ while (NumDeoptArgs--)
+ MOI = parseOperand(MOI, MOE, Locations, LiveOuts);
+
+ // Record gc base/derived pairs
+ assert(MOI->isImm() && MOI->getImm() == StackMaps::ConstantOp);
+ ++MOI;
+ assert(MOI->isImm());
+ unsigned NumGCPointers = MOI->getImm();
+ ++MOI;
+ if (NumGCPointers) {
+ // Map logical index of GC ptr to MI operand index.
+ SmallVector<unsigned, 8> GCPtrIndices;
+ unsigned GCPtrIdx = (unsigned)SO.getFirstGCPtrIdx();
+ assert((int)GCPtrIdx != -1);
+ assert(MOI - MI.operands_begin() == GCPtrIdx + 0LL);
+ while (NumGCPointers--) {
+ GCPtrIndices.push_back(GCPtrIdx);
+ GCPtrIdx = StackMaps::getNextMetaArgIdx(&MI, GCPtrIdx);
+ }
+
+ SmallVector<std::pair<unsigned, unsigned>, 8> GCPairs;
+ unsigned NumGCPairs = SO.getGCPointerMap(GCPairs);
+ (void)NumGCPairs;
+ LLVM_DEBUG(dbgs() << "NumGCPairs = " << NumGCPairs << "\n");
+
+ auto MOB = MI.operands_begin();
+ for (auto &P : GCPairs) {
+ assert(P.first < GCPtrIndices.size() && "base pointer index not found");
+ assert(P.second < GCPtrIndices.size() &&
+ "derived pointer index not found");
+ unsigned BaseIdx = GCPtrIndices[P.first];
+ unsigned DerivedIdx = GCPtrIndices[P.second];
+ LLVM_DEBUG(dbgs() << "Base : " << BaseIdx << " Derived : " << DerivedIdx
+ << "\n");
+ (void)parseOperand(MOB + BaseIdx, MOE, Locations, LiveOuts);
+ (void)parseOperand(MOB + DerivedIdx, MOE, Locations, LiveOuts);
+ }
+
+ MOI = MOB + GCPtrIdx;
+ }
+
+ // Record gc allocas
+ assert(MOI < MOE);
+ assert(MOI->isImm() && MOI->getImm() == StackMaps::ConstantOp);
+ ++MOI;
+ unsigned NumAllocas = MOI->getImm();
+ ++MOI;
+ while (NumAllocas--) {
+ MOI = parseOperand(MOI, MOE, Locations, LiveOuts);
+ assert(MOI < MOE);
+ }
+}
+
void StackMaps::recordStackMapOpers(const MCSymbol &MILabel,
const MachineInstr &MI, uint64_t ID,
MachineInstr::const_mop_iterator MOI,
@@ -311,9 +471,11 @@ void StackMaps::recordStackMapOpers(const MCSymbol &MILabel,
}
// Parse operands.
- while (MOI != MOE) {
- MOI = parseOperand(MOI, MOE, Locations, LiveOuts);
- }
+ if (MI.getOpcode() == TargetOpcode::STATEPOINT)
+ parseStatepointOpers(MI, MOI, MOE, Locations, LiveOuts);
+ else
+ while (MOI != MOE)
+ MOI = parseOperand(MOI, MOE, Locations, LiveOuts);
// Move large constants into the constant pool.
for (auto &Loc : Locations) {
@@ -394,8 +556,6 @@ void StackMaps::recordStatepoint(const MCSymbol &L, const MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::STATEPOINT && "expected statepoint");
StatepointOpers opers(&MI);
- // Record all the deopt and gc operands (they're contiguous and run from the
- // initial index to the end of the operand list)
const unsigned StartIdx = opers.getVarIdx();
recordStackMapOpers(L, MI, opers.getID(), MI.operands_begin() + StartIdx,
MI.operands_end(), false);
@@ -404,7 +564,7 @@ void StackMaps::recordStatepoint(const MCSymbol &L, const MachineInstr &MI) {
/// Emit the stackmap header.
///
/// Header {
-/// uint8 : Stack Map Version (currently 2)
+/// uint8 : Stack Map Version (currently 3)
/// uint8 : Reserved (expected to be 0)
/// uint16 : Reserved (expected to be 0)
/// }
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp
index a343791807e6..0411faabbcc3 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp
@@ -170,7 +170,8 @@ bool StackProtector::HasAddressTaken(const Instruction *AI,
// If this instruction accesses memory make sure it doesn't access beyond
// the bounds of the allocated object.
Optional<MemoryLocation> MemLoc = MemoryLocation::getOrNone(I);
- if (MemLoc.hasValue() && MemLoc->Size.getValue() > AllocSize)
+ if (MemLoc.hasValue() && MemLoc->Size.hasValue() &&
+ MemLoc->Size.getValue() > AllocSize)
return true;
switch (I->getOpcode()) {
case Instruction::Store:
@@ -251,10 +252,9 @@ bool StackProtector::HasAddressTaken(const Instruction *AI,
static const CallInst *findStackProtectorIntrinsic(Function &F) {
for (const BasicBlock &BB : F)
for (const Instruction &I : BB)
- if (const CallInst *CI = dyn_cast<CallInst>(&I))
- if (CI->getCalledFunction() ==
- Intrinsic::getDeclaration(F.getParent(), Intrinsic::stackprotector))
- return CI;
+ if (const auto *II = dyn_cast<IntrinsicInst>(&I))
+ if (II->getIntrinsicID() == Intrinsic::stackprotector)
+ return II;
return nullptr;
}
@@ -274,7 +274,6 @@ static const CallInst *findStackProtectorIntrinsic(Function &F) {
bool StackProtector::RequiresStackProtector() {
bool Strong = false;
bool NeedsProtector = false;
- HasPrologue = findStackProtectorIntrinsic(*F);
if (F->hasFnAttribute(Attribute::SafeStack))
return false;
@@ -295,8 +294,6 @@ bool StackProtector::RequiresStackProtector() {
Strong = true; // Use the same heuristic as strong to determine SSPLayout
} else if (F->hasFnAttribute(Attribute::StackProtectStrong))
Strong = true;
- else if (HasPrologue)
- NeedsProtector = true;
else if (!F->hasFnAttribute(Attribute::StackProtect))
return false;
@@ -381,7 +378,10 @@ bool StackProtector::RequiresStackProtector() {
static Value *getStackGuard(const TargetLoweringBase *TLI, Module *M,
IRBuilder<> &B,
bool *SupportsSelectionDAGSP = nullptr) {
- if (Value *Guard = TLI->getIRStackGuard(B))
+ Value *Guard = TLI->getIRStackGuard(B);
+ auto GuardMode = TLI->getTargetMachine().Options.StackProtectorGuard;
+ if ((GuardMode == llvm::StackProtectorGuards::TLS ||
+ GuardMode == llvm::StackProtectorGuards::None) && Guard)
return B.CreateLoad(B.getInt8PtrTy(), Guard, true, "StackGuard");
// Use SelectionDAG SSP handling, since there isn't an IR guard.
@@ -556,7 +556,9 @@ BasicBlock *StackProtector::CreateFailBB() {
LLVMContext &Context = F->getContext();
BasicBlock *FailBB = BasicBlock::Create(Context, "CallStackCheckFailBlk", F);
IRBuilder<> B(FailBB);
- B.SetCurrentDebugLocation(DebugLoc::get(0, 0, F->getSubprogram()));
+ if (F->getSubprogram())
+ B.SetCurrentDebugLocation(
+ DILocation::get(Context, 0, 0, F->getSubprogram()));
if (Trip.isOSOpenBSD()) {
FunctionCallee StackChkFail = M->getOrInsertFunction(
"__stack_smash_handler", Type::getVoidTy(Context),
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp
index 3cc5d30ebad7..a6f8974f3343 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp
@@ -145,7 +145,7 @@ namespace {
// their weight.
struct IntervalSorter {
bool operator()(LiveInterval* LHS, LiveInterval* RHS) const {
- return LHS->weight > RHS->weight;
+ return LHS->weight() > RHS->weight();
}
};
@@ -174,7 +174,8 @@ void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) {
continue;
LiveInterval &li = LS->getInterval(FI);
if (!MI.isDebugValue())
- li.weight += LiveIntervals::getSpillWeight(false, true, MBFI, MI);
+ li.incrementWeight(
+ LiveIntervals::getSpillWeight(false, true, MBFI, MI));
}
for (MachineInstr::mmo_iterator MMOI = MI.memoperands_begin(),
EE = MI.memoperands_end();
@@ -222,7 +223,7 @@ void StackSlotColoring::InitializeSlots() {
for (auto *I : Intervals) {
LiveInterval &li = I->second;
LLVM_DEBUG(li.dump());
- int FI = Register::stackSlot2Index(li.reg);
+ int FI = Register::stackSlot2Index(li.reg());
if (MFI->isDeadObjectIndex(FI))
continue;
@@ -269,7 +270,7 @@ StackSlotColoring::OverlapWithAssignments(LiveInterval *li, int Color) const {
int StackSlotColoring::ColorSlot(LiveInterval *li) {
int Color = -1;
bool Share = false;
- int FI = Register::stackSlot2Index(li->reg);
+ int FI = Register::stackSlot2Index(li->reg());
uint8_t StackID = MFI->getStackID(FI);
if (!DisableSharing) {
@@ -331,12 +332,12 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
bool Changed = false;
for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
LiveInterval *li = SSIntervals[i];
- int SS = Register::stackSlot2Index(li->reg);
+ int SS = Register::stackSlot2Index(li->reg());
int NewSS = ColorSlot(li);
assert(NewSS >= 0 && "Stack coloring failed?");
SlotMapping[SS] = NewSS;
RevMap[NewSS].push_back(SS);
- SlotWeights[NewSS] += li->weight;
+ SlotWeights[NewSS] += li->weight();
UsedColors.set(NewSS);
Changed |= (SS != NewSS);
}
@@ -344,8 +345,8 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "\nSpill slots after coloring:\n");
for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
LiveInterval *li = SSIntervals[i];
- int SS = Register::stackSlot2Index(li->reg);
- li->weight = SlotWeights[SS];
+ int SS = Register::stackSlot2Index(li->reg());
+ li->setWeight(SlotWeights[SS]);
}
// Sort them by new weight.
llvm::stable_sort(SSIntervals, IntervalSorter());
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp
index dd0b9d4c2e48..4408011c95c0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SwiftErrorValueTracking.cpp
@@ -202,8 +202,8 @@ void SwiftErrorValueTracking::propagateVRegs() {
// downward defs.
bool needPHI =
VRegs.size() >= 1 &&
- std::find_if(
- VRegs.begin(), VRegs.end(),
+ llvm::find_if(
+ VRegs,
[&](const std::pair<const MachineBasicBlock *, Register> &V)
-> bool { return V.second != VRegs[0].second; }) !=
VRegs.end();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SwitchLoweringUtils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
index 078c9691f8dc..dfcec32d9537 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
@@ -11,8 +11,10 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/SwitchLoweringUtils.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp
index f9773f74a7bd..575bf555c489 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp
@@ -720,8 +720,7 @@ bool TailDuplicator::duplicateSimpleBB(
SmallVectorImpl<MachineInstr *> &Copies) {
SmallPtrSet<MachineBasicBlock *, 8> Succs(TailBB->succ_begin(),
TailBB->succ_end());
- SmallVector<MachineBasicBlock *, 8> Preds(TailBB->pred_begin(),
- TailBB->pred_end());
+ SmallVector<MachineBasicBlock *, 8> Preds(TailBB->predecessors());
bool Changed = false;
for (MachineBasicBlock *PredBB : Preds) {
if (PredBB->hasEHPadSuccessor() || PredBB->mayHaveInlineAsmBr())
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
index f8b482c04a58..b0594ec086b2 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -41,9 +41,9 @@ bool TargetFrameLowering::enableCalleeSaveSkip(const MachineFunction &MF) const
/// frame of the specified index, along with the frame register used
/// (in output arg FrameReg). This is the default implementation which
/// is overridden for some targets.
-int TargetFrameLowering::getFrameIndexReference(const MachineFunction &MF,
- int FI,
- Register &FrameReg) const {
+StackOffset
+TargetFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
+ Register &FrameReg) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
@@ -52,8 +52,9 @@ int TargetFrameLowering::getFrameIndexReference(const MachineFunction &MF,
// something different.
FrameReg = RI->getFrameRegister(MF);
- return MFI.getObjectOffset(FI) + MFI.getStackSize() -
- getOffsetOfLocalArea() + MFI.getOffsetAdjustment();
+ return StackOffset::getFixed(MFI.getObjectOffset(FI) + MFI.getStackSize() -
+ getOffsetOfLocalArea() +
+ MFI.getOffsetAdjustment());
}
bool TargetFrameLowering::needsFrameIndexResolution(
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp
index 24f3f96d0b1d..165860ef1aa8 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -69,6 +69,15 @@ void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB,
llvm_unreachable("Target didn't implement insertNoop!");
}
+/// insertNoops - Insert noops into the instruction stream at the specified
+/// point.
+void TargetInstrInfo::insertNoops(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned Quantity) const {
+ for (unsigned i = 0; i < Quantity; ++i)
+ insertNoop(MBB, MI);
+}
+
static bool isAsmComment(const char *Str, const MCAsmInfo &MAI) {
return strncmp(Str, MAI.getCommentString().data(),
MAI.getCommentString().size()) == 0;
@@ -471,6 +480,7 @@ static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr &MI,
ArrayRef<unsigned> Ops, int FrameIndex,
const TargetInstrInfo &TII) {
unsigned StartIdx = 0;
+ unsigned NumDefs = 0;
switch (MI.getOpcode()) {
case TargetOpcode::STACKMAP: {
// StackMapLiveValues are foldable
@@ -486,16 +496,25 @@ static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr &MI,
case TargetOpcode::STATEPOINT: {
// For statepoints, fold deopt and gc arguments, but not call arguments.
StartIdx = StatepointOpers(&MI).getVarIdx();
+ NumDefs = MI.getNumDefs();
break;
}
default:
llvm_unreachable("unexpected stackmap opcode");
}
+ unsigned DefToFoldIdx = MI.getNumOperands();
+
// Return false if any operands requested for folding are not foldable (not
// part of the stackmap's live values).
for (unsigned Op : Ops) {
- if (Op < StartIdx)
+ if (Op < NumDefs) {
+ assert(DefToFoldIdx == MI.getNumOperands() && "Folding multiple defs");
+ DefToFoldIdx = Op;
+ } else if (Op < StartIdx) {
+ return nullptr;
+ }
+ if (MI.getOperand(Op).isTied())
return nullptr;
}
@@ -505,11 +524,16 @@ static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr &MI,
// No need to fold return, the meta data, and function arguments
for (unsigned i = 0; i < StartIdx; ++i)
- MIB.add(MI.getOperand(i));
+ if (i != DefToFoldIdx)
+ MIB.add(MI.getOperand(i));
- for (unsigned i = StartIdx; i < MI.getNumOperands(); ++i) {
+ for (unsigned i = StartIdx, e = MI.getNumOperands(); i < e; ++i) {
MachineOperand &MO = MI.getOperand(i);
+ unsigned TiedTo = e;
+ (void)MI.isRegTiedToDefOperand(i, &TiedTo);
+
if (is_contained(Ops, i)) {
+ assert(TiedTo == e && "Cannot fold tied operands");
unsigned SpillSize;
unsigned SpillOffset;
// Compute the spill slot size and offset.
@@ -523,9 +547,15 @@ static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr &MI,
MIB.addImm(SpillSize);
MIB.addFrameIndex(FrameIndex);
MIB.addImm(SpillOffset);
- }
- else
+ } else {
MIB.add(MO);
+ if (TiedTo < e) {
+ assert(TiedTo < NumDefs && "Bad tied operand");
+ if (TiedTo > DefToFoldIdx)
+ --TiedTo;
+ NewMI->tieOperands(TiedTo, NewMI->getNumOperands() - 1);
+ }
+ }
}
return NewMI;
}
@@ -748,8 +778,8 @@ bool TargetInstrInfo::isReassociationCandidate(const MachineInstr &Inst,
// instruction is known to not increase the critical path, then don't match
// that pattern.
bool TargetInstrInfo::getMachineCombinerPatterns(
- MachineInstr &Root,
- SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
+ MachineInstr &Root, SmallVectorImpl<MachineCombinerPattern> &Patterns,
+ bool DoRegPressureReduce) const {
bool Commute;
if (isReassociationCandidate(Root, Commute)) {
// We found a sequence of instructions that may be suitable for a
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 42c1fa8af0e6..28c8bd0a7ded 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -135,23 +135,28 @@ void TargetLoweringBase::InitLibcalls(const Triple &TT) {
setLibcallCallingConv((RTLIB::Libcall)LC, CallingConv::C);
// For IEEE quad-precision libcall names, PPC uses "kf" instead of "tf".
- if (TT.getArch() == Triple::ppc || TT.isPPC64()) {
+ if (TT.isPPC()) {
setLibcallName(RTLIB::ADD_F128, "__addkf3");
setLibcallName(RTLIB::SUB_F128, "__subkf3");
setLibcallName(RTLIB::MUL_F128, "__mulkf3");
setLibcallName(RTLIB::DIV_F128, "__divkf3");
+ setLibcallName(RTLIB::POWI_F128, "__powikf2");
setLibcallName(RTLIB::FPEXT_F32_F128, "__extendsfkf2");
setLibcallName(RTLIB::FPEXT_F64_F128, "__extenddfkf2");
setLibcallName(RTLIB::FPROUND_F128_F32, "__trunckfsf2");
setLibcallName(RTLIB::FPROUND_F128_F64, "__trunckfdf2");
setLibcallName(RTLIB::FPTOSINT_F128_I32, "__fixkfsi");
setLibcallName(RTLIB::FPTOSINT_F128_I64, "__fixkfdi");
+ setLibcallName(RTLIB::FPTOSINT_F128_I128, "__fixkfti");
setLibcallName(RTLIB::FPTOUINT_F128_I32, "__fixunskfsi");
setLibcallName(RTLIB::FPTOUINT_F128_I64, "__fixunskfdi");
+ setLibcallName(RTLIB::FPTOUINT_F128_I128, "__fixunskfti");
setLibcallName(RTLIB::SINTTOFP_I32_F128, "__floatsikf");
setLibcallName(RTLIB::SINTTOFP_I64_F128, "__floatdikf");
+ setLibcallName(RTLIB::SINTTOFP_I128_F128, "__floattikf");
setLibcallName(RTLIB::UINTTOFP_I32_F128, "__floatunsikf");
setLibcallName(RTLIB::UINTTOFP_I64_F128, "__floatundikf");
+ setLibcallName(RTLIB::UINTTOFP_I128_F128, "__floatuntikf");
setLibcallName(RTLIB::OEQ_F128, "__eqkf2");
setLibcallName(RTLIB::UNE_F128, "__nekf2");
setLibcallName(RTLIB::OGE_F128, "__gekf2");
@@ -224,6 +229,10 @@ RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) {
if (OpVT == MVT::f16) {
if (RetVT == MVT::f32)
return FPEXT_F16_F32;
+ if (RetVT == MVT::f64)
+ return FPEXT_F16_F64;
+ if (RetVT == MVT::f128)
+ return FPEXT_F16_F128;
} else if (OpVT == MVT::f32) {
if (RetVT == MVT::f64)
return FPEXT_F32_F64;
@@ -285,7 +294,14 @@ RTLIB::Libcall RTLIB::getFPROUND(EVT OpVT, EVT RetVT) {
/// getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or
/// UNKNOWN_LIBCALL if there is none.
RTLIB::Libcall RTLIB::getFPTOSINT(EVT OpVT, EVT RetVT) {
- if (OpVT == MVT::f32) {
+ if (OpVT == MVT::f16) {
+ if (RetVT == MVT::i32)
+ return FPTOSINT_F16_I32;
+ if (RetVT == MVT::i64)
+ return FPTOSINT_F16_I64;
+ if (RetVT == MVT::i128)
+ return FPTOSINT_F16_I128;
+ } else if (OpVT == MVT::f32) {
if (RetVT == MVT::i32)
return FPTOSINT_F32_I32;
if (RetVT == MVT::i64)
@@ -327,7 +343,14 @@ RTLIB::Libcall RTLIB::getFPTOSINT(EVT OpVT, EVT RetVT) {
/// getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or
/// UNKNOWN_LIBCALL if there is none.
RTLIB::Libcall RTLIB::getFPTOUINT(EVT OpVT, EVT RetVT) {
- if (OpVT == MVT::f32) {
+ if (OpVT == MVT::f16) {
+ if (RetVT == MVT::i32)
+ return FPTOUINT_F16_I32;
+ if (RetVT == MVT::i64)
+ return FPTOUINT_F16_I64;
+ if (RetVT == MVT::i128)
+ return FPTOUINT_F16_I128;
+ } else if (OpVT == MVT::f32) {
if (RetVT == MVT::i32)
return FPTOUINT_F32_I32;
if (RetVT == MVT::i64)
@@ -370,6 +393,8 @@ RTLIB::Libcall RTLIB::getFPTOUINT(EVT OpVT, EVT RetVT) {
/// UNKNOWN_LIBCALL if there is none.
RTLIB::Libcall RTLIB::getSINTTOFP(EVT OpVT, EVT RetVT) {
if (OpVT == MVT::i32) {
+ if (RetVT == MVT::f16)
+ return SINTTOFP_I32_F16;
if (RetVT == MVT::f32)
return SINTTOFP_I32_F32;
if (RetVT == MVT::f64)
@@ -381,6 +406,8 @@ RTLIB::Libcall RTLIB::getSINTTOFP(EVT OpVT, EVT RetVT) {
if (RetVT == MVT::ppcf128)
return SINTTOFP_I32_PPCF128;
} else if (OpVT == MVT::i64) {
+ if (RetVT == MVT::f16)
+ return SINTTOFP_I64_F16;
if (RetVT == MVT::f32)
return SINTTOFP_I64_F32;
if (RetVT == MVT::f64)
@@ -392,6 +419,8 @@ RTLIB::Libcall RTLIB::getSINTTOFP(EVT OpVT, EVT RetVT) {
if (RetVT == MVT::ppcf128)
return SINTTOFP_I64_PPCF128;
} else if (OpVT == MVT::i128) {
+ if (RetVT == MVT::f16)
+ return SINTTOFP_I128_F16;
if (RetVT == MVT::f32)
return SINTTOFP_I128_F32;
if (RetVT == MVT::f64)
@@ -410,6 +439,8 @@ RTLIB::Libcall RTLIB::getSINTTOFP(EVT OpVT, EVT RetVT) {
/// UNKNOWN_LIBCALL if there is none.
RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) {
if (OpVT == MVT::i32) {
+ if (RetVT == MVT::f16)
+ return UINTTOFP_I32_F16;
if (RetVT == MVT::f32)
return UINTTOFP_I32_F32;
if (RetVT == MVT::f64)
@@ -421,6 +452,8 @@ RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) {
if (RetVT == MVT::ppcf128)
return UINTTOFP_I32_PPCF128;
} else if (OpVT == MVT::i64) {
+ if (RetVT == MVT::f16)
+ return UINTTOFP_I64_F16;
if (RetVT == MVT::f32)
return UINTTOFP_I64_F32;
if (RetVT == MVT::f64)
@@ -432,6 +465,8 @@ RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) {
if (RetVT == MVT::ppcf128)
return UINTTOFP_I64_PPCF128;
} else if (OpVT == MVT::i128) {
+ if (RetVT == MVT::f16)
+ return UINTTOFP_I128_F16;
if (RetVT == MVT::f32)
return UINTTOFP_I128_F32;
if (RetVT == MVT::f64)
@@ -446,6 +481,83 @@ RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) {
return UNKNOWN_LIBCALL;
}
+RTLIB::Libcall RTLIB::getOUTLINE_ATOMIC(unsigned Opc, AtomicOrdering Order,
+ MVT VT) {
+ unsigned ModeN, ModelN;
+ switch (VT.SimpleTy) {
+ case MVT::i8:
+ ModeN = 0;
+ break;
+ case MVT::i16:
+ ModeN = 1;
+ break;
+ case MVT::i32:
+ ModeN = 2;
+ break;
+ case MVT::i64:
+ ModeN = 3;
+ break;
+ case MVT::i128:
+ ModeN = 4;
+ break;
+ default:
+ return UNKNOWN_LIBCALL;
+ }
+
+ switch (Order) {
+ case AtomicOrdering::Monotonic:
+ ModelN = 0;
+ break;
+ case AtomicOrdering::Acquire:
+ ModelN = 1;
+ break;
+ case AtomicOrdering::Release:
+ ModelN = 2;
+ break;
+ case AtomicOrdering::AcquireRelease:
+ case AtomicOrdering::SequentiallyConsistent:
+ ModelN = 3;
+ break;
+ default:
+ return UNKNOWN_LIBCALL;
+ }
+
+#define LCALLS(A, B) \
+ { A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL }
+#define LCALL5(A) \
+ LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
+ switch (Opc) {
+ case ISD::ATOMIC_CMP_SWAP: {
+ const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_CAS)};
+ return LC[ModeN][ModelN];
+ }
+ case ISD::ATOMIC_SWAP: {
+ const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_SWP)};
+ return LC[ModeN][ModelN];
+ }
+ case ISD::ATOMIC_LOAD_ADD: {
+ const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_LDADD)};
+ return LC[ModeN][ModelN];
+ }
+ case ISD::ATOMIC_LOAD_OR: {
+ const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_LDSET)};
+ return LC[ModeN][ModelN];
+ }
+ case ISD::ATOMIC_LOAD_CLR: {
+ const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_LDCLR)};
+ return LC[ModeN][ModelN];
+ }
+ case ISD::ATOMIC_LOAD_XOR: {
+ const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_LDEOR)};
+ return LC[ModeN][ModelN];
+ }
+ default:
+ return UNKNOWN_LIBCALL;
+ }
+#undef LCALLS
+#undef LCALL5
+}
+
RTLIB::Libcall RTLIB::getSYNC(unsigned Opc, MVT VT) {
#define OP_TO_LIBCALL(Name, Enum) \
case Name: \
@@ -615,7 +727,7 @@ void TargetLoweringBase::initActions() {
std::end(TargetDAGCombineArray), 0);
for (MVT VT : MVT::fp_valuetypes()) {
- MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits().getFixedSize());
+ MVT IntVT = MVT::getIntegerVT(VT.getFixedSizeInBits());
if (IntVT.isValid()) {
setOperationAction(ISD::ATOMIC_SWAP, VT, Promote);
AddPromotedToType(ISD::ATOMIC_SWAP, VT, IntVT);
@@ -657,6 +769,8 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::UADDSAT, VT, Expand);
setOperationAction(ISD::SSUBSAT, VT, Expand);
setOperationAction(ISD::USUBSAT, VT, Expand);
+ setOperationAction(ISD::SSHLSAT, VT, Expand);
+ setOperationAction(ISD::USHLSAT, VT, Expand);
setOperationAction(ISD::SMULFIX, VT, Expand);
setOperationAction(ISD::SMULFIXSAT, VT, Expand);
setOperationAction(ISD::UMULFIX, VT, Expand);
@@ -665,6 +779,8 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::SDIVFIXSAT, VT, Expand);
setOperationAction(ISD::UDIVFIX, VT, Expand);
setOperationAction(ISD::UDIVFIXSAT, VT, Expand);
+ setOperationAction(ISD::FP_TO_SINT_SAT, VT, Expand);
+ setOperationAction(ISD::FP_TO_UINT_SAT, VT, Expand);
// Overflow operations default to expand
setOperationAction(ISD::SADDO, VT, Expand);
@@ -678,6 +794,8 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::ADDCARRY, VT, Expand);
setOperationAction(ISD::SUBCARRY, VT, Expand);
setOperationAction(ISD::SETCCCARRY, VT, Expand);
+ setOperationAction(ISD::SADDO_CARRY, VT, Expand);
+ setOperationAction(ISD::SSUBO_CARRY, VT, Expand);
// ADDC/ADDE/SUBC/SUBE default to expand.
setOperationAction(ISD::ADDC, VT, Expand);
@@ -690,6 +808,7 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
setOperationAction(ISD::BITREVERSE, VT, Expand);
+ setOperationAction(ISD::PARITY, VT, Expand);
// These library functions default to expand.
setOperationAction(ISD::FROUND, VT, Expand);
@@ -728,6 +847,8 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::VECREDUCE_UMIN, VT, Expand);
setOperationAction(ISD::VECREDUCE_FMAX, VT, Expand);
setOperationAction(ISD::VECREDUCE_FMIN, VT, Expand);
+ setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Expand);
+ setOperationAction(ISD::VECREDUCE_SEQ_FMUL, VT, Expand);
}
// Most targets ignore the @llvm.prefetch intrinsic.
@@ -772,6 +893,8 @@ void TargetLoweringBase::initActions() {
// On most systems, DEBUGTRAP and TRAP have no difference. The "Expand"
// here is to inform DAG Legalizer to replace DEBUGTRAP with TRAP.
setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand);
+
+ setOperationAction(ISD::UBSANTRAP, MVT::Other, Expand);
}
MVT TargetLoweringBase::getScalarShiftAmountTy(const DataLayout &DL,
@@ -801,6 +924,11 @@ bool TargetLoweringBase::canOpTrap(unsigned Op, EVT VT) const {
}
}
+bool TargetLoweringBase::isFreeAddrSpaceCast(unsigned SrcAS,
+ unsigned DestAS) const {
+ return TM.isNoopAddrSpaceCast(SrcAS, DestAS);
+}
+
void TargetLoweringBase::setJumpIsExpensive(bool isExpensive) {
// If the command-line option was specified, ignore this request.
if (!JumpIsExpensiveOverride.getNumOccurrences())
@@ -823,9 +951,7 @@ TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const {
"Promote may not follow Expand or Promote");
if (LA == TypeSplitVector)
- return LegalizeKind(LA,
- EVT::getVectorVT(Context, SVT.getVectorElementType(),
- SVT.getVectorElementCount() / 2));
+ return LegalizeKind(LA, EVT(SVT).getHalfNumVectorElementsVT(Context));
if (LA == TypeScalarizeVector)
return LegalizeKind(LA, SVT.getVectorElementType());
return LegalizeKind(LA, NVT);
@@ -856,10 +982,10 @@ TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const {
EVT EltVT = VT.getVectorElementType();
// Vectors with only one element are always scalarized.
- if (NumElts == 1)
+ if (NumElts.isScalar())
return LegalizeKind(TypeScalarizeVector, EltVT);
- if (VT.getVectorElementCount() == ElementCount(1, true))
+ if (VT.getVectorElementCount() == ElementCount::getScalable(1))
report_fatal_error("Cannot legalize this vector");
// Try to widen vector elements until the element type is a power of two and
@@ -869,7 +995,7 @@ TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const {
// Vectors with a number of elements that is not a power of two are always
// widened, for example <3 x i8> -> <4 x i8>.
if (!VT.isPow2VectorType()) {
- NumElts = NumElts.NextPowerOf2();
+ NumElts = NumElts.coefficientNextPowerOf2();
EVT NVT = EVT::getVectorVT(Context, EltVT, NumElts);
return LegalizeKind(TypeWidenVector, NVT);
}
@@ -881,7 +1007,7 @@ TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const {
// <4 x i140> -> <2 x i140>
if (LK.first == TypeExpandInteger)
return LegalizeKind(TypeSplitVector,
- EVT::getVectorVT(Context, EltVT, NumElts / 2));
+ VT.getHalfNumVectorElementsVT(Context));
// Promote the integer element types until a legal vector type is found
// or until the element integer type is too big. If a legal type was not
@@ -918,7 +1044,7 @@ TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const {
// If there is no wider legal type, split the vector.
while (true) {
// Round up to the next power of 2.
- NumElts = NumElts.NextPowerOf2();
+ NumElts = NumElts.coefficientNextPowerOf2();
// If there is no simple vector type with this many elements then there
// cannot be a larger legal vector type. Note that this assumes that
@@ -941,7 +1067,8 @@ TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const {
}
// Vectors with illegal element types are expanded.
- EVT NVT = EVT::getVectorVT(Context, EltVT, VT.getVectorElementCount() / 2);
+ EVT NVT = EVT::getVectorVT(Context, EltVT,
+ VT.getVectorElementCount().divideCoefficientBy(2));
return LegalizeKind(TypeSplitVector, NVT);
}
@@ -957,23 +1084,24 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
// Scalable vectors cannot be scalarized, so splitting or widening is
// required.
- if (VT.isScalableVector() && !isPowerOf2_32(EC.Min))
+ if (VT.isScalableVector() && !isPowerOf2_32(EC.getKnownMinValue()))
llvm_unreachable(
"Splitting or widening of non-power-of-2 MVTs is not implemented.");
// FIXME: We don't support non-power-of-2-sized vectors for now.
// Ideally we could break down into LHS/RHS like LegalizeDAG does.
- if (!isPowerOf2_32(EC.Min)) {
+ if (!isPowerOf2_32(EC.getKnownMinValue())) {
// Split EC to unit size (scalable property is preserved).
- NumVectorRegs = EC.Min;
- EC = EC / NumVectorRegs;
+ NumVectorRegs = EC.getKnownMinValue();
+ EC = ElementCount::getFixed(1);
}
// Divide the input until we get to a supported size. This will
// always end up with an EC that represent a scalar or a scalable
// scalar.
- while (EC.Min > 1 && !TLI->isTypeLegal(MVT::getVectorVT(EltTy, EC))) {
- EC.Min >>= 1;
+ while (EC.getKnownMinValue() > 1 &&
+ !TLI->isTypeLegal(MVT::getVectorVT(EltTy, EC))) {
+ EC = EC.divideCoefficientBy(2);
NumVectorRegs <<= 1;
}
@@ -984,7 +1112,7 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
NewVT = EltTy;
IntermediateVT = NewVT;
- unsigned LaneSizeInBits = NewVT.getScalarSizeInBits().getFixedSize();
+ unsigned LaneSizeInBits = NewVT.getScalarSizeInBits();
// Convert sizes such as i33 to i64.
if (!isPowerOf2_32(LaneSizeInBits))
@@ -993,8 +1121,7 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
MVT DestVT = TLI->getRegisterType(NewVT);
RegisterVT = DestVT;
if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16.
- return NumVectorRegs *
- (LaneSizeInBits / DestVT.getScalarSizeInBits().getFixedSize());
+ return NumVectorRegs * (LaneSizeInBits / DestVT.getScalarSizeInBits());
// Otherwise, promotion or legal types use the same number of registers as
// the vector decimated to the appropriate level.
@@ -1041,9 +1168,19 @@ TargetLoweringBase::emitPatchPoint(MachineInstr &InitialMI,
// Inherit previous memory operands.
MIB.cloneMemRefs(*MI);
- for (auto &MO : MI->operands()) {
+ for (unsigned i = 0; i < MI->getNumOperands(); ++i) {
+ MachineOperand &MO = MI->getOperand(i);
if (!MO.isFI()) {
+ // Index of Def operand this Use it tied to.
+ // Since Defs are coming before Uses, if Use is tied, then
+ // index of Def must be smaller that index of that Use.
+ // Also, Defs preserve their position in new MI.
+ unsigned TiedTo = i;
+ if (MO.isReg() && MO.isTied())
+ TiedTo = MI->findTiedOperandIdx(i);
MIB.add(MO);
+ if (TiedTo < i)
+ MIB->tieOperands(TiedTo, MIB->getNumOperands() - 1);
continue;
}
@@ -1090,36 +1227,6 @@ TargetLoweringBase::emitPatchPoint(MachineInstr &InitialMI,
return MBB;
}
-MachineBasicBlock *
-TargetLoweringBase::emitXRayCustomEvent(MachineInstr &MI,
- MachineBasicBlock *MBB) const {
- assert(MI.getOpcode() == TargetOpcode::PATCHABLE_EVENT_CALL &&
- "Called emitXRayCustomEvent on the wrong MI!");
- auto &MF = *MI.getMF();
- auto MIB = BuildMI(MF, MI.getDebugLoc(), MI.getDesc());
- for (unsigned OpIdx = 0; OpIdx != MI.getNumOperands(); ++OpIdx)
- MIB.add(MI.getOperand(OpIdx));
-
- MBB->insert(MachineBasicBlock::iterator(MI), MIB);
- MI.eraseFromParent();
- return MBB;
-}
-
-MachineBasicBlock *
-TargetLoweringBase::emitXRayTypedEvent(MachineInstr &MI,
- MachineBasicBlock *MBB) const {
- assert(MI.getOpcode() == TargetOpcode::PATCHABLE_TYPED_EVENT_CALL &&
- "Called emitXRayTypedEvent on the wrong MI!");
- auto &MF = *MI.getMF();
- auto MIB = BuildMI(MF, MI.getDebugLoc(), MI.getDesc());
- for (unsigned OpIdx = 0; OpIdx != MI.getNumOperands(); ++OpIdx)
- MIB.add(MI.getOperand(OpIdx));
-
- MBB->insert(MachineBasicBlock::iterator(MI), MIB);
- MI.eraseFromParent();
- return MBB;
-}
-
/// findRepresentativeClass - Return the largest legal super-reg register class
/// of the register class for the specified type and its associated "cost".
// This function is in TargetLowering because it uses RegClassForVT which would
@@ -1282,7 +1389,7 @@ void TargetLoweringBase::computeRegisterProperties(
MVT SVT = (MVT::SimpleValueType) nVT;
// Promote vectors of integers to vectors with the same number
// of elements, with a wider element type.
- if (SVT.getScalarSizeInBits() > EltVT.getSizeInBits() &&
+ if (SVT.getScalarSizeInBits() > EltVT.getFixedSizeInBits() &&
SVT.getVectorElementCount() == EC && isTypeLegal(SVT)) {
TransformToType[i] = SVT;
RegisterTypeForVT[i] = SVT;
@@ -1298,13 +1405,15 @@ void TargetLoweringBase::computeRegisterProperties(
}
case TypeWidenVector:
- if (isPowerOf2_32(EC.Min)) {
+ if (isPowerOf2_32(EC.getKnownMinValue())) {
// Try to widen the vector.
for (unsigned nVT = i + 1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
MVT SVT = (MVT::SimpleValueType) nVT;
if (SVT.getVectorElementType() == EltVT &&
SVT.isScalableVector() == IsScalable &&
- SVT.getVectorElementCount().Min > EC.Min && isTypeLegal(SVT)) {
+ SVT.getVectorElementCount().getKnownMinValue() >
+ EC.getKnownMinValue() &&
+ isTypeLegal(SVT)) {
TransformToType[i] = SVT;
RegisterTypeForVT[i] = SVT;
NumRegistersForVT[i] = 1;
@@ -1348,10 +1457,10 @@ void TargetLoweringBase::computeRegisterProperties(
ValueTypeActions.setTypeAction(VT, TypeScalarizeVector);
else if (PreferredAction == TypeSplitVector)
ValueTypeActions.setTypeAction(VT, TypeSplitVector);
- else if (EC.Min > 1)
+ else if (EC.getKnownMinValue() > 1)
ValueTypeActions.setTypeAction(VT, TypeSplitVector);
else
- ValueTypeActions.setTypeAction(VT, EC.Scalable
+ ValueTypeActions.setTypeAction(VT, EC.isScalable()
? TypeScalarizeScalableVector
: TypeScalarizeVector);
} else {
@@ -1409,7 +1518,8 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT
// This handles things like <2 x float> -> <4 x float> and
// <4 x i1> -> <4 x i32>.
LegalizeTypeAction TA = getTypeAction(Context, VT);
- if (EltCnt.Min != 1 && (TA == TypeWidenVector || TA == TypePromoteInteger)) {
+ if (EltCnt.getKnownMinValue() != 1 &&
+ (TA == TypeWidenVector || TA == TypePromoteInteger)) {
EVT RegisterEVT = getTypeToTransformTo(Context, VT);
if (isTypeLegal(RegisterEVT)) {
IntermediateVT = RegisterEVT;
@@ -1426,7 +1536,7 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT
// Scalable vectors cannot be scalarized, so handle the legalisation of the
// types like done elsewhere in SelectionDAG.
- if (VT.isScalableVector() && !isPowerOf2_32(EltCnt.Min)) {
+ if (VT.isScalableVector() && !isPowerOf2_32(EltCnt.getKnownMinValue())) {
LegalizeKind LK;
EVT PartVT = VT;
do {
@@ -1435,15 +1545,15 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT
PartVT = LK.second;
} while (LK.first != TypeLegal);
- NumIntermediates =
- VT.getVectorElementCount().Min / PartVT.getVectorElementCount().Min;
+ NumIntermediates = VT.getVectorElementCount().getKnownMinValue() /
+ PartVT.getVectorElementCount().getKnownMinValue();
// FIXME: This code needs to be extended to handle more complex vector
// breakdowns, like nxv7i64 -> nxv8i64 -> 4 x nxv2i64. Currently the only
// supported cases are vectors that are broken down into equal parts
// such as nxv6i64 -> 3 x nxv2i64.
- assert(NumIntermediates * PartVT.getVectorElementCount().Min ==
- VT.getVectorElementCount().Min &&
+ assert((PartVT.getVectorElementCount() * NumIntermediates) ==
+ VT.getVectorElementCount() &&
"Expected an integer multiple of PartVT");
IntermediateVT = PartVT;
RegisterVT = getRegisterType(Context, IntermediateVT);
@@ -1452,16 +1562,16 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT
// FIXME: We don't support non-power-of-2-sized vectors for now. Ideally
// we could break down into LHS/RHS like LegalizeDAG does.
- if (!isPowerOf2_32(EltCnt.Min)) {
- NumVectorRegs = EltCnt.Min;
- EltCnt.Min = 1;
+ if (!isPowerOf2_32(EltCnt.getKnownMinValue())) {
+ NumVectorRegs = EltCnt.getKnownMinValue();
+ EltCnt = ElementCount::getFixed(1);
}
// Divide the input until we get to a supported size. This will always
// end with a scalar if the target doesn't support vectors.
- while (EltCnt.Min > 1 &&
+ while (EltCnt.getKnownMinValue() > 1 &&
!isTypeLegal(EVT::getVectorVT(Context, EltTy, EltCnt))) {
- EltCnt.Min >>= 1;
+ EltCnt = EltCnt.divideCoefficientBy(2);
NumVectorRegs <<= 1;
}
@@ -1479,7 +1589,7 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT
TypeSize NewVTSize = NewVT.getSizeInBits();
// Convert sizes such as i33 to i64.
if (!isPowerOf2_32(NewVTSize.getKnownMinSize()))
- NewVTSize = NewVTSize.NextPowerOf2();
+ NewVTSize = NewVTSize.coefficientNextPowerOf2();
return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits());
}
@@ -1616,6 +1726,14 @@ bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,
MMO.getFlags(), Fast);
}
+bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,
+ const DataLayout &DL, LLT Ty,
+ const MachineMemOperand &MMO,
+ bool *Fast) const {
+ return allowsMemoryAccess(Context, DL, getMVTForLLT(Ty), MMO.getAddrSpace(),
+ MMO.getAlign(), MMO.getFlags(), Fast);
+}
+
BranchProbability TargetLoweringBase::getPredictableBranchThreshold() const {
return BranchProbability(MinPercentageForPredictableBranch, 100);
}
@@ -1838,10 +1956,14 @@ Value *TargetLoweringBase::getIRStackGuard(IRBuilder<> &IRB) const {
// Currently only support "standard" __stack_chk_guard.
// TODO: add LOAD_STACK_GUARD support.
void TargetLoweringBase::insertSSPDeclarations(Module &M) const {
- if (!M.getNamedValue("__stack_chk_guard"))
- new GlobalVariable(M, Type::getInt8PtrTy(M.getContext()), false,
- GlobalVariable::ExternalLinkage,
- nullptr, "__stack_chk_guard");
+ if (!M.getNamedValue("__stack_chk_guard")) {
+ auto *GV = new GlobalVariable(M, Type::getInt8PtrTy(M.getContext()), false,
+ GlobalVariable::ExternalLinkage, nullptr,
+ "__stack_chk_guard");
+ if (TM.getRelocationModel() == Reloc::Static &&
+ !TM.getTargetTriple().isWindowsGNUEnvironment())
+ GV->setDSOLocal(true);
+ }
}
// Currently only support "standard" __stack_chk_guard.
@@ -1925,7 +2047,7 @@ static bool parseRefinementStep(StringRef In, size_t &Position,
// step parameter.
if (RefStepString.size() == 1) {
char RefStepChar = RefStepString[0];
- if (RefStepChar >= '0' && RefStepChar <= '9') {
+ if (isDigit(RefStepChar)) {
Value = RefStepChar - '0';
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 27bebe503ce6..fe64b38cf0be 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -21,6 +21,7 @@
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/BinaryFormat/MachO.h"
+#include "llvm/CodeGen/BasicBlockSectionUtils.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
@@ -39,6 +40,7 @@
#include "llvm/IR/Mangler.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/PseudoProbe.h"
#include "llvm/IR/Type.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
@@ -104,10 +106,14 @@ static void GetObjCImageInfo(Module &M, unsigned &Version, unsigned &Flags,
// ELF
//===----------------------------------------------------------------------===//
+TargetLoweringObjectFileELF::TargetLoweringObjectFileELF()
+ : TargetLoweringObjectFile() {
+ SupportDSOLocalEquivalentLowering = true;
+}
+
void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx,
const TargetMachine &TgtM) {
TargetLoweringObjectFile::Initialize(Ctx, TgtM);
- TM = &TgtM;
CodeModel::Model CM = TgtM.getCodeModel();
InitializeELF(TgtM.Options.UseInitArray);
@@ -122,6 +128,7 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx,
// Fallthrough if not using EHABI
LLVM_FALLTHROUGH;
case Triple::ppc:
+ case Triple::ppcle:
case Triple::x86:
PersonalityEncoding = isPositionIndependent()
? dwarf::DW_EH_PE_indirect |
@@ -174,11 +181,20 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx,
// will be in memory. Most of these could end up >2GB away so even a signed
// pc-relative 32-bit address is insufficient, theoretically.
if (isPositionIndependent()) {
- PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
- dwarf::DW_EH_PE_sdata8;
- LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8;
- TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
- dwarf::DW_EH_PE_sdata8;
+ // ILP32 uses sdata4 instead of sdata8
+ if (TgtM.getTargetTriple().getEnvironment() == Triple::GNUILP32) {
+ PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_sdata4;
+ LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+ TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_sdata4;
+ } else {
+ PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_sdata8;
+ LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8;
+ TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_sdata8;
+ }
} else {
PersonalityEncoding = dwarf::DW_EH_PE_absptr;
LSDAEncoding = dwarf::DW_EH_PE_absptr;
@@ -310,6 +326,29 @@ void TargetLoweringObjectFileELF::emitModuleMetadata(MCStreamer &Streamer,
}
}
+ if (NamedMDNode *FuncInfo = M.getNamedMetadata(PseudoProbeDescMetadataName)) {
+ // Emit a descriptor for every function including functions that have an
+ // available external linkage. We may not want this for imported functions
+ // that has code in another thinLTO module but we don't have a good way to
+ // tell them apart from inline functions defined in header files. Therefore
+ // we put each descriptor in a separate comdat section and rely on the
+ // linker to deduplicate.
+ for (const auto *Operand : FuncInfo->operands()) {
+ const auto *MD = cast<MDNode>(Operand);
+ auto *GUID = mdconst::dyn_extract<ConstantInt>(MD->getOperand(0));
+ auto *Hash = mdconst::dyn_extract<ConstantInt>(MD->getOperand(1));
+ auto *Name = cast<MDString>(MD->getOperand(2));
+ auto *S = C.getObjectFileInfo()->getPseudoProbeDescSection(
+ TM->getFunctionSections() ? Name->getString() : StringRef());
+
+ Streamer.SwitchSection(S);
+ Streamer.emitInt64(GUID->getZExtValue());
+ Streamer.emitInt64(Hash->getZExtValue());
+ Streamer.emitULEB128IntValue(Name->getString().size());
+ Streamer.emitBytes(Name->getString());
+ }
+ }
+
unsigned Version = 0;
unsigned Flags = 0;
StringRef Section;
@@ -324,46 +363,7 @@ void TargetLoweringObjectFileELF::emitModuleMetadata(MCStreamer &Streamer,
Streamer.AddBlankLine();
}
- SmallVector<Module::ModuleFlagEntry, 8> ModuleFlags;
- M.getModuleFlagsMetadata(ModuleFlags);
-
- MDNode *CFGProfile = nullptr;
-
- for (const auto &MFE : ModuleFlags) {
- StringRef Key = MFE.Key->getString();
- if (Key == "CG Profile") {
- CFGProfile = cast<MDNode>(MFE.Val);
- break;
- }
- }
-
- if (!CFGProfile)
- return;
-
- auto GetSym = [this](const MDOperand &MDO) -> MCSymbol * {
- if (!MDO)
- return nullptr;
- auto V = cast<ValueAsMetadata>(MDO);
- const Function *F = cast<Function>(V->getValue());
- return TM->getSymbol(F);
- };
-
- for (const auto &Edge : CFGProfile->operands()) {
- MDNode *E = cast<MDNode>(Edge);
- const MCSymbol *From = GetSym(E->getOperand(0));
- const MCSymbol *To = GetSym(E->getOperand(1));
- // Skip null functions. This can happen if functions are dead stripped after
- // the CGProfile pass has been run.
- if (!From || !To)
- continue;
- uint64_t Count = cast<ConstantAsMetadata>(E->getOperand(2))
- ->getValue()
- ->getUniqueInteger()
- .getZExtValue();
- Streamer.emitCGProfileEntry(
- MCSymbolRefExpr::create(From, MCSymbolRefExpr::VK_None, C),
- MCSymbolRefExpr::create(To, MCSymbolRefExpr::VK_None, C), Count);
- }
+ emitCGProfileMetadata(Streamer, M);
}
MCSymbol *TargetLoweringObjectFileELF::getCFIPersonalitySymbol(
@@ -436,7 +436,8 @@ static SectionKind getELFKindForNamedSection(StringRef Name, SectionKind K) {
if (Name == getInstrProfSectionName(IPSK_covmap, Triple::ELF,
/*AddSegmentInfo=*/false) ||
Name == getInstrProfSectionName(IPSK_covfun, Triple::ELF,
- /*AddSegmentInfo=*/false))
+ /*AddSegmentInfo=*/false) ||
+ Name == ".llvmbc" || Name == ".llvmcmd")
return SectionKind::getMetadata();
if (Name.empty() || Name[0] != '.') return K;
@@ -614,7 +615,7 @@ getELFSectionNameForGlobal(const GlobalObject *GO, SectionKind Kind,
bool HasPrefix = false;
if (const auto *F = dyn_cast<Function>(GO)) {
if (Optional<StringRef> Prefix = F->getSectionPrefix()) {
- Name += *Prefix;
+ raw_svector_ostream(Name) << '.' << *Prefix;
HasPrefix = true;
}
}
@@ -680,11 +681,12 @@ MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal(
// MD_associated in a unique section.
unsigned UniqueID = MCContext::GenericSectionID;
const MCSymbolELF *LinkedToSym = getLinkedToSymbol(GO, TM);
- if (LinkedToSym) {
+ if (GO->getMetadata(LLVMContext::MD_associated)) {
UniqueID = NextUniqueID++;
Flags |= ELF::SHF_LINK_ORDER;
} else {
- if (getContext().getAsmInfo()->useIntegratedAssembler()) {
+ if (getContext().getAsmInfo()->useIntegratedAssembler() ||
+ getContext().getAsmInfo()->binutilsIsAtLeast(2, 35)) {
// Symbols must be placed into sections with compatible entry
// sizes. Generate unique sections for symbols that have not
// been assigned to compatible sections.
@@ -735,8 +737,9 @@ MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal(
assert(Section->getLinkedToSymbol() == LinkedToSym &&
"Associated symbol mismatch between sections");
- if (!getContext().getAsmInfo()->useIntegratedAssembler()) {
- // If we are not using the integrated assembler then this symbol might have
+ if (!(getContext().getAsmInfo()->useIntegratedAssembler() ||
+ getContext().getAsmInfo()->binutilsIsAtLeast(2, 35))) {
+ // If we are using GNU as before 2.35, then this symbol might have
// been placed in an incompatible mergeable section. Emit an error if this
// is the case to avoid creating broken output.
if ((Section->getFlags() & ELF::SHF_MERGE) &&
@@ -831,6 +834,43 @@ MCSection *TargetLoweringObjectFileELF::getSectionForJumpTable(
/* AssociatedSymbol */ nullptr);
}
+MCSection *
+TargetLoweringObjectFileELF::getSectionForLSDA(const Function &F,
+ const TargetMachine &TM) const {
+ // If neither COMDAT nor function sections, use the monolithic LSDA section.
+ // Re-use this path if LSDASection is null as in the Arm EHABI.
+ if (!LSDASection || (!F.hasComdat() && !TM.getFunctionSections()))
+ return LSDASection;
+
+ const auto *LSDA = cast<MCSectionELF>(LSDASection);
+ unsigned Flags = LSDA->getFlags();
+ StringRef Group;
+ if (F.hasComdat()) {
+ Group = F.getComdat()->getName();
+ Flags |= ELF::SHF_GROUP;
+ }
+
+ // Append the function name as the suffix like GCC, assuming
+ // -funique-section-names applies to .gcc_except_table sections.
+ if (TM.getUniqueSectionNames())
+ return getContext().getELFSection(LSDA->getName() + "." + F.getName(),
+ LSDA->getType(), Flags, 0, Group,
+ MCSection::NonUniqueID, nullptr);
+
+ // Allocate a unique ID if function sections && (integrated assembler or GNU
+ // as>=2.35). Note we could use SHF_LINK_ORDER to facilitate --gc-sections but
+ // that would require that we know the linker is a modern LLD (12.0 or later).
+ // GNU ld as of 2.35 does not support mixed SHF_LINK_ORDER &
+ // non-SHF_LINK_ORDER components in an output section
+ // https://sourceware.org/bugzilla/show_bug.cgi?id=26256
+ unsigned ID = TM.getFunctionSections() &&
+ getContext().getAsmInfo()->useIntegratedAssembler()
+ ? NextUniqueID++
+ : MCSection::NonUniqueID;
+ return getContext().getELFSection(LSDA->getName(), LSDA->getType(), Flags, 0,
+ Group, ID, nullptr);
+}
+
bool TargetLoweringObjectFileELF::shouldPutJumpTableInFunctionSection(
bool UsesLabelDifference, const Function &F) const {
// We can always create relative relocations, so use another section
@@ -865,14 +905,14 @@ MCSection *TargetLoweringObjectFileELF::getSectionForMachineBasicBlock(
assert(MBB.isBeginSection() && "Basic block does not start a section!");
unsigned UniqueID = MCContext::GenericSectionID;
- // For cold sections use the .text.unlikely prefix along with the parent
+ // For cold sections use the .text.split. prefix along with the parent
// function name. All cold blocks for the same function go to the same
// section. Similarly all exception blocks are grouped by symbol name
// under the .text.eh prefix. For regular sections, we either use a unique
// name, or a unique ID for the section.
SmallString<128> Name;
if (MBB.getSectionID() == MBBSectionID::ColdSectionID) {
- Name += ".text.unlikely.";
+ Name += BBSectionsColdTextPrefix;
Name += MBB.getParent()->getName();
} else if (MBB.getSectionID() == MBBSectionID::ExceptionSectionID) {
Name += ".text.eh.";
@@ -888,7 +928,7 @@ MCSection *TargetLoweringObjectFileELF::getSectionForMachineBasicBlock(
}
unsigned Flags = ELF::SHF_ALLOC | ELF::SHF_EXECINSTR;
- std::string GroupName = "";
+ std::string GroupName;
if (F.hasComdat()) {
Flags |= ELF::SHF_GROUP;
GroupName = F.getComdat()->getName().str();
@@ -968,6 +1008,20 @@ const MCExpr *TargetLoweringObjectFileELF::lowerRelativeReference(
MCSymbolRefExpr::create(TM.getSymbol(RHS), getContext()), getContext());
}
+const MCExpr *TargetLoweringObjectFileELF::lowerDSOLocalEquivalent(
+ const DSOLocalEquivalent *Equiv, const TargetMachine &TM) const {
+ assert(supportDSOLocalEquivalentLowering());
+
+ const auto *GV = Equiv->getGlobalValue();
+
+ // A PLT entry is not needed for dso_local globals.
+ if (GV->isDSOLocal() || GV->isImplicitDSOLocal())
+ return MCSymbolRefExpr::create(TM.getSymbol(GV), getContext());
+
+ return MCSymbolRefExpr::create(TM.getSymbol(GV), PLTRelativeVariantKind,
+ getContext());
+}
+
MCSection *TargetLoweringObjectFileELF::getSectionForCommandLines() const {
// Use ".GCC.command.line" since this feature is to support clang's
// -frecord-gcc-switches which in turn attempts to mimic GCC's switch of the
@@ -1515,6 +1569,10 @@ MCSection *TargetLoweringObjectFileCOFF::SelectSectionForGlobal(
MCSymbol *Sym = TM.getSymbol(ComdatGV);
StringRef COMDATSymName = Sym->getName();
+ if (const auto *F = dyn_cast<Function>(GO))
+ if (Optional<StringRef> Prefix = F->getSectionPrefix())
+ raw_svector_ostream(Name) << '$' << *Prefix;
+
// Append "$symbol" to the section name *before* IR-level mangling is
// applied when targetting mingw. This is what GCC does, and the ld.bfd
// COFF linker will not properly handle comdats otherwise.
@@ -1590,6 +1648,31 @@ MCSection *TargetLoweringObjectFileCOFF::getSectionForJumpTable(
void TargetLoweringObjectFileCOFF::emitModuleMetadata(MCStreamer &Streamer,
Module &M) const {
+ emitLinkerDirectives(Streamer, M);
+
+ unsigned Version = 0;
+ unsigned Flags = 0;
+ StringRef Section;
+
+ GetObjCImageInfo(M, Version, Flags, Section);
+ if (!Section.empty()) {
+ auto &C = getContext();
+ auto *S = C.getCOFFSection(Section,
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getReadOnly());
+ Streamer.SwitchSection(S);
+ Streamer.emitLabel(C.getOrCreateSymbol(StringRef("OBJC_IMAGE_INFO")));
+ Streamer.emitInt32(Version);
+ Streamer.emitInt32(Flags);
+ Streamer.AddBlankLine();
+ }
+
+ emitCGProfileMetadata(Streamer, M);
+}
+
+void TargetLoweringObjectFileCOFF::emitLinkerDirectives(
+ MCStreamer &Streamer, Module &M) const {
if (NamedMDNode *LinkerOptions = M.getNamedMetadata("llvm.linker.options")) {
// Emit the linker options to the linker .drectve section. According to the
// spec, this section is a space-separated string containing flags for
@@ -1606,28 +1689,51 @@ void TargetLoweringObjectFileCOFF::emitModuleMetadata(MCStreamer &Streamer,
}
}
- unsigned Version = 0;
- unsigned Flags = 0;
- StringRef Section;
-
- GetObjCImageInfo(M, Version, Flags, Section);
- if (Section.empty())
- return;
+ // Emit /EXPORT: flags for each exported global as necessary.
+ std::string Flags;
+ for (const GlobalValue &GV : M.global_values()) {
+ raw_string_ostream OS(Flags);
+ emitLinkerFlagsForGlobalCOFF(OS, &GV, getTargetTriple(), getMangler());
+ OS.flush();
+ if (!Flags.empty()) {
+ Streamer.SwitchSection(getDrectveSection());
+ Streamer.emitBytes(Flags);
+ }
+ Flags.clear();
+ }
- auto &C = getContext();
- auto *S = C.getCOFFSection(
- Section, COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getReadOnly());
- Streamer.SwitchSection(S);
- Streamer.emitLabel(C.getOrCreateSymbol(StringRef("OBJC_IMAGE_INFO")));
- Streamer.emitInt32(Version);
- Streamer.emitInt32(Flags);
- Streamer.AddBlankLine();
+ // Emit /INCLUDE: flags for each used global as necessary.
+ if (const auto *LU = M.getNamedGlobal("llvm.used")) {
+ assert(LU->hasInitializer() && "expected llvm.used to have an initializer");
+ assert(isa<ArrayType>(LU->getValueType()) &&
+ "expected llvm.used to be an array type");
+ if (const auto *A = cast<ConstantArray>(LU->getInitializer())) {
+ for (const Value *Op : A->operands()) {
+ const auto *GV = cast<GlobalValue>(Op->stripPointerCasts());
+ // Global symbols with internal or private linkage are not visible to
+ // the linker, and thus would cause an error when the linker tried to
+ // preserve the symbol due to the `/include:` directive.
+ if (GV->hasLocalLinkage())
+ continue;
+
+ raw_string_ostream OS(Flags);
+ emitLinkerFlagsForUsedCOFF(OS, GV, getTargetTriple(), getMangler());
+ OS.flush();
+
+ if (!Flags.empty()) {
+ Streamer.SwitchSection(getDrectveSection());
+ Streamer.emitBytes(Flags);
+ }
+ Flags.clear();
+ }
+ }
+ }
}
void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx,
const TargetMachine &TM) {
TargetLoweringObjectFile::Initialize(Ctx, TM);
+ this->TM = &TM;
const Triple &T = TM.getTargetTriple();
if (T.isWindowsMSVCEnvironment() || T.isWindowsItaniumEnvironment()) {
StaticCtorSection =
@@ -1702,16 +1808,6 @@ MCSection *TargetLoweringObjectFileCOFF::getStaticDtorSection(
cast<MCSectionCOFF>(StaticDtorSection));
}
-void TargetLoweringObjectFileCOFF::emitLinkerFlagsForGlobal(
- raw_ostream &OS, const GlobalValue *GV) const {
- emitLinkerFlagsForGlobalCOFF(OS, GV, getTargetTriple(), getMangler());
-}
-
-void TargetLoweringObjectFileCOFF::emitLinkerFlagsForUsed(
- raw_ostream &OS, const GlobalValue *GV) const {
- emitLinkerFlagsForUsedCOFF(OS, GV, getTargetTriple(), getMangler());
-}
-
const MCExpr *TargetLoweringObjectFileCOFF::lowerRelativeReference(
const GlobalValue *LHS, const GlobalValue *RHS,
const TargetMachine &TM) const {
@@ -1882,7 +1978,7 @@ static MCSectionWasm *selectWasmSectionForGlobal(
if (const auto *F = dyn_cast<Function>(GO)) {
const auto &OptionalPrefix = F->getSectionPrefix();
if (OptionalPrefix)
- Name += *OptionalPrefix;
+ raw_svector_ostream(Name) << '.' << *OptionalPrefix;
}
if (EmitUniqueSection && UniqueSectionNames) {
@@ -1970,14 +2066,36 @@ MCSection *TargetLoweringObjectFileWasm::getStaticDtorSection(
//===----------------------------------------------------------------------===//
// XCOFF
//===----------------------------------------------------------------------===//
+bool TargetLoweringObjectFileXCOFF::ShouldEmitEHBlock(
+ const MachineFunction *MF) {
+ if (!MF->getLandingPads().empty())
+ return true;
+
+ const Function &F = MF->getFunction();
+ if (!F.hasPersonalityFn() || !F.needsUnwindTableEntry())
+ return false;
+
+ const Function *Per =
+ dyn_cast<Function>(F.getPersonalityFn()->stripPointerCasts());
+ if (isNoOpWithoutInvoke(classifyEHPersonality(Per)))
+ return false;
+
+ return true;
+}
+
+MCSymbol *
+TargetLoweringObjectFileXCOFF::getEHInfoTableSymbol(const MachineFunction *MF) {
+ return MF->getMMI().getContext().getOrCreateSymbol(
+ "__ehinfo." + Twine(MF->getFunctionNumber()));
+}
+
MCSymbol *
TargetLoweringObjectFileXCOFF::getTargetSymbol(const GlobalValue *GV,
const TargetMachine &TM) const {
- if (TM.getDataSections())
- report_fatal_error("XCOFF unique data sections not yet implemented");
-
// We always use a qualname symbol for a GV that represents
// a declaration, a function descriptor, or a common symbol.
+ // If a GV represents a GlobalVariable and -fdata-sections is enabled, we
+ // also return a qualname so that a label symbol could be avoided.
// It is inherently ambiguous when the GO represents the address of a
// function, as the GO could either represent a function descriptor or a
// function entry point. We choose to always return a function descriptor
@@ -1992,21 +2110,34 @@ TargetLoweringObjectFileXCOFF::getTargetSymbol(const GlobalValue *GV,
return cast<MCSectionXCOFF>(
getSectionForFunctionDescriptor(cast<Function>(GO), TM))
->getQualNameSymbol();
- if (GOKind.isCommon() || GOKind.isBSSLocal())
+ if ((TM.getDataSections() && !GO->hasSection()) || GOKind.isCommon() ||
+ GOKind.isBSSLocal())
return cast<MCSectionXCOFF>(SectionForGlobal(GO, GOKind, TM))
->getQualNameSymbol();
}
// For all other cases, fall back to getSymbol to return the unqualified name.
- // This could change for a GV that is a GlobalVariable when we decide to
- // support -fdata-sections since we could avoid having label symbols if the
- // linkage name is applied to the csect symbol.
return nullptr;
}
MCSection *TargetLoweringObjectFileXCOFF::getExplicitSectionGlobal(
const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
- report_fatal_error("XCOFF explicit sections not yet implemented.");
+ if (!GO->hasSection())
+ report_fatal_error("#pragma clang section is not yet supported");
+
+ StringRef SectionName = GO->getSection();
+ XCOFF::StorageMappingClass MappingClass;
+ if (Kind.isText())
+ MappingClass = XCOFF::XMC_PR;
+ else if (Kind.isData() || Kind.isReadOnlyWithRel() || Kind.isBSS())
+ MappingClass = XCOFF::XMC_RW;
+ else if (Kind.isReadOnly())
+ MappingClass = XCOFF::XMC_RO;
+ else
+ report_fatal_error("XCOFF other section types not yet implemented.");
+
+ return getContext().getXCOFFSection(SectionName, MappingClass, XCOFF::XTY_SD,
+ Kind, /* MultiSymbolsAllowed*/ true);
}
MCSection *TargetLoweringObjectFileXCOFF::getSectionForExternalReference(
@@ -2016,30 +2147,23 @@ MCSection *TargetLoweringObjectFileXCOFF::getSectionForExternalReference(
SmallString<128> Name;
getNameWithPrefix(Name, GO, TM);
- XCOFF::StorageClass SC =
- TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GO);
// Externals go into a csect of type ER.
return getContext().getXCOFFSection(
Name, isa<Function>(GO) ? XCOFF::XMC_DS : XCOFF::XMC_UA, XCOFF::XTY_ER,
- SC, SectionKind::getMetadata());
+ SectionKind::getMetadata());
}
MCSection *TargetLoweringObjectFileXCOFF::SelectSectionForGlobal(
const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
- assert(!TM.getFunctionSections() && !TM.getDataSections() &&
- "XCOFF unique sections not yet implemented.");
-
// Common symbols go into a csect with matching name which will get mapped
// into the .bss section.
if (Kind.isBSSLocal() || Kind.isCommon()) {
SmallString<128> Name;
getNameWithPrefix(Name, GO, TM);
- XCOFF::StorageClass SC =
- TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GO);
return getContext().getXCOFFSection(
Name, Kind.isBSSLocal() ? XCOFF::XMC_BS : XCOFF::XMC_RW, XCOFF::XTY_CM,
- SC, Kind, /* BeginSymbolName */ nullptr);
+ Kind);
}
if (Kind.isMergeableCString()) {
@@ -2051,40 +2175,65 @@ MCSection *TargetLoweringObjectFileXCOFF::SelectSectionForGlobal(
SmallString<128> Name;
Name = SizeSpec + utostr(Alignment.value());
+ if (TM.getDataSections())
+ getNameWithPrefix(Name, GO, TM);
+
return getContext().getXCOFFSection(
- Name, XCOFF::XMC_RO, XCOFF::XTY_SD,
- TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GO),
- Kind, /* BeginSymbolName */ nullptr);
+ Name, XCOFF::XMC_RO, XCOFF::XTY_SD, Kind,
+ /* MultiSymbolsAllowed*/ !TM.getDataSections());
}
- if (Kind.isText())
+ if (Kind.isText()) {
+ if (TM.getFunctionSections()) {
+ return cast<MCSymbolXCOFF>(getFunctionEntryPointSymbol(GO, TM))
+ ->getRepresentedCsect();
+ }
return TextSection;
+ }
- if (Kind.isData() || Kind.isReadOnlyWithRel())
- // TODO: We may put this under option control, because user may want to
- // have read-only data with relocations placed into a read-only section by
- // the compiler.
- return DataSection;
-
- // Zero initialized data must be emitted to the .data section because external
- // linkage control sections that get mapped to the .bss section will be linked
- // as tentative defintions, which is only appropriate for SectionKind::Common.
- if (Kind.isBSS())
+ // TODO: We may put Kind.isReadOnlyWithRel() under option control, because
+ // user may want to have read-only data with relocations placed into a
+ // read-only section by the compiler.
+ // For BSS kind, zero initialized data must be emitted to the .data section
+ // because external linkage control sections that get mapped to the .bss
+ // section will be linked as tentative defintions, which is only appropriate
+ // for SectionKind::Common.
+ if (Kind.isData() || Kind.isReadOnlyWithRel() || Kind.isBSS()) {
+ if (TM.getDataSections()) {
+ SmallString<128> Name;
+ getNameWithPrefix(Name, GO, TM);
+ return getContext().getXCOFFSection(Name, XCOFF::XMC_RW, XCOFF::XTY_SD,
+ SectionKind::getData());
+ }
return DataSection;
+ }
- if (Kind.isReadOnly())
+ if (Kind.isReadOnly()) {
+ if (TM.getDataSections()) {
+ SmallString<128> Name;
+ getNameWithPrefix(Name, GO, TM);
+ return getContext().getXCOFFSection(Name, XCOFF::XMC_RO, XCOFF::XTY_SD,
+ SectionKind::getReadOnly());
+ }
return ReadOnlySection;
+ }
report_fatal_error("XCOFF other section types not yet implemented.");
}
MCSection *TargetLoweringObjectFileXCOFF::getSectionForJumpTable(
const Function &F, const TargetMachine &TM) const {
- assert (!TM.getFunctionSections() && "Unique sections not supported on XCOFF"
- " yet.");
assert (!F.getComdat() && "Comdat not supported on XCOFF.");
- //TODO: Enable emiting jump table to unique sections when we support it.
- return ReadOnlySection;
+
+ if (!TM.getFunctionSections())
+ return ReadOnlySection;
+
+ // If the function can be removed, produce a unique section so that
+ // the table doesn't prevent the removal.
+ SmallString<128> NameStr(".rodata.jmp..");
+ getNameWithPrefix(NameStr, &F, TM);
+ return getContext().getXCOFFSection(NameStr, XCOFF::XMC_RO, XCOFF::XTY_SD,
+ SectionKind::getReadOnly());
}
bool TargetLoweringObjectFileXCOFF::shouldPutJumpTableInFunctionSection(
@@ -2104,19 +2253,23 @@ MCSection *TargetLoweringObjectFileXCOFF::getSectionForConstant(
void TargetLoweringObjectFileXCOFF::Initialize(MCContext &Ctx,
const TargetMachine &TgtM) {
TargetLoweringObjectFile::Initialize(Ctx, TgtM);
- TTypeEncoding = 0;
+ TTypeEncoding =
+ dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_datarel |
+ (TgtM.getTargetTriple().isArch32Bit() ? dwarf::DW_EH_PE_sdata4
+ : dwarf::DW_EH_PE_sdata8);
PersonalityEncoding = 0;
LSDAEncoding = 0;
+ CallSiteEncoding = dwarf::DW_EH_PE_udata4;
}
MCSection *TargetLoweringObjectFileXCOFF::getStaticCtorSection(
- unsigned Priority, const MCSymbol *KeySym) const {
- report_fatal_error("XCOFF ctor section not yet implemented.");
+ unsigned Priority, const MCSymbol *KeySym) const {
+ report_fatal_error("no static constructor section on AIX");
}
MCSection *TargetLoweringObjectFileXCOFF::getStaticDtorSection(
- unsigned Priority, const MCSymbol *KeySym) const {
- report_fatal_error("XCOFF dtor section not yet implemented.");
+ unsigned Priority, const MCSymbol *KeySym) const {
+ report_fatal_error("no static destructor section on AIX");
}
const MCExpr *TargetLoweringObjectFileXCOFF::lowerRelativeReference(
@@ -2125,9 +2278,11 @@ const MCExpr *TargetLoweringObjectFileXCOFF::lowerRelativeReference(
report_fatal_error("XCOFF not yet implemented.");
}
-XCOFF::StorageClass TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(
- const GlobalObject *GO) {
- switch (GO->getLinkage()) {
+XCOFF::StorageClass
+TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(const GlobalValue *GV) {
+ assert(!isa<GlobalIFunc>(GV) && "GlobalIFunc is not supported on AIX.");
+
+ switch (GV->getLinkage()) {
case GlobalValue::InternalLinkage:
case GlobalValue::PrivateLinkage:
return XCOFF::C_HIDEXT;
@@ -2149,10 +2304,32 @@ XCOFF::StorageClass TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(
}
MCSymbol *TargetLoweringObjectFileXCOFF::getFunctionEntryPointSymbol(
- const Function *F, const TargetMachine &TM) const {
+ const GlobalValue *Func, const TargetMachine &TM) const {
+ assert(
+ (isa<Function>(Func) ||
+ (isa<GlobalAlias>(Func) &&
+ isa_and_nonnull<Function>(cast<GlobalAlias>(Func)->getBaseObject()))) &&
+ "Func must be a function or an alias which has a function as base "
+ "object.");
+
SmallString<128> NameStr;
NameStr.push_back('.');
- getNameWithPrefix(NameStr, F, TM);
+ getNameWithPrefix(NameStr, Func, TM);
+
+ // When -function-sections is enabled and explicit section is not specified,
+ // it's not necessary to emit function entry point label any more. We will use
+ // function entry point csect instead. And for function delcarations, the
+ // undefined symbols gets treated as csect with XTY_ER property.
+ if (((TM.getFunctionSections() && !Func->hasSection()) ||
+ Func->isDeclaration()) &&
+ isa<Function>(Func)) {
+ return getContext()
+ .getXCOFFSection(NameStr, XCOFF::XMC_PR,
+ Func->isDeclaration() ? XCOFF::XTY_ER : XCOFF::XTY_SD,
+ SectionKind::getText())
+ ->getQualNameSymbol();
+ }
+
return getContext().getOrCreateSymbol(NameStr);
}
@@ -2161,13 +2338,15 @@ MCSection *TargetLoweringObjectFileXCOFF::getSectionForFunctionDescriptor(
SmallString<128> NameStr;
getNameWithPrefix(NameStr, F, TM);
return getContext().getXCOFFSection(NameStr, XCOFF::XMC_DS, XCOFF::XTY_SD,
- getStorageClassForGlobal(F),
SectionKind::getData());
}
MCSection *TargetLoweringObjectFileXCOFF::getSectionForTOCEntry(
- const MCSymbol *Sym) const {
+ const MCSymbol *Sym, const TargetMachine &TM) const {
+ // Use TE storage-mapping class when large code model is enabled so that
+ // the chance of needing -bbigtoc is decreased.
return getContext().getXCOFFSection(
- cast<MCSymbolXCOFF>(Sym)->getSymbolTableName(), XCOFF::XMC_TC,
- XCOFF::XTY_SD, XCOFF::C_HIDEXT, SectionKind::getData());
+ cast<MCSymbolXCOFF>(Sym)->getSymbolTableName(),
+ TM.getCodeModel() == CodeModel::Large ? XCOFF::XMC_TE : XCOFF::XMC_TC,
+ XCOFF::XTY_SD, SectionKind::getData());
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetOptionsImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetOptionsImpl.cpp
index 4866d4c171c0..0731cf9b28f4 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetOptionsImpl.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetOptionsImpl.cpp
@@ -47,7 +47,11 @@ bool TargetOptions::HonorSignDependentRoundingFPMath() const {
}
/// NOTE: There are targets that still do not support the debug entry values
-/// production.
+/// production and that is being controlled with the SupportsDebugEntryValues.
+/// In addition, SCE debugger does not have the feature implemented, so prefer
+/// not to emit the debug entry values in that case.
+/// The EnableDebugEntryValues can be used for the testing purposes.
bool TargetOptions::ShouldEmitDebugEntryValues() const {
- return SupportsDebugEntryValues || EnableDebugEntryValues;
+ return (SupportsDebugEntryValues && DebuggerTuning != DebuggerKind::SCE) ||
+ EnableDebugEntryValues;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp
index e0fdb0cefcb8..e844d03854e2 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -29,6 +29,7 @@
#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/IR/IRPrintingPasses.h"
#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/PassInstrumentation.h"
#include "llvm/IR/Verifier.h"
#include "llvm/InitializePasses.h"
#include "llvm/MC/MCAsmInfo.h"
@@ -41,6 +42,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/SaveAndRestore.h"
#include "llvm/Support/Threading.h"
+#include "llvm/Target/CGPassBuilderOption.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils.h"
@@ -120,16 +122,22 @@ static cl::opt<cl::boolOrDefault> DebugifyAndStripAll(
"Debugify MIR before and Strip debug after "
"each pass except those known to be unsafe when debug info is present"),
cl::ZeroOrMore);
-enum RunOutliner { AlwaysOutline, NeverOutline, TargetDefault };
+static cl::opt<cl::boolOrDefault> DebugifyCheckAndStripAll(
+ "debugify-check-and-strip-all-safe", cl::Hidden,
+ cl::desc(
+ "Debugify MIR before, by checking and stripping the debug info after, "
+ "each pass except those known to be unsafe when debug info is present"),
+ cl::ZeroOrMore);
// Enable or disable the MachineOutliner.
static cl::opt<RunOutliner> EnableMachineOutliner(
"enable-machine-outliner", cl::desc("Enable the machine outliner"),
- cl::Hidden, cl::ValueOptional, cl::init(TargetDefault),
- cl::values(clEnumValN(AlwaysOutline, "always",
+ cl::Hidden, cl::ValueOptional, cl::init(RunOutliner::TargetDefault),
+ cl::values(clEnumValN(RunOutliner::AlwaysOutline, "always",
"Run on all functions guaranteed to be beneficial"),
- clEnumValN(NeverOutline, "never", "Disable all outlining"),
+ clEnumValN(RunOutliner::NeverOutline, "never",
+ "Disable all outlining"),
// Sentinel value for unspecified option.
- clEnumValN(AlwaysOutline, "", "")));
+ clEnumValN(RunOutliner::AlwaysOutline, "", "")));
// Enable or disable FastISel. Both options are needed, because
// FastISel is enabled by default with -fast, and we wish to be
// able to enable or disable fast-isel independently from -O0.
@@ -141,9 +149,11 @@ static cl::opt<cl::boolOrDefault> EnableGlobalISelOption(
"global-isel", cl::Hidden,
cl::desc("Enable the \"global\" instruction selector"));
-static cl::opt<std::string> PrintMachineInstrs(
- "print-machineinstrs", cl::ValueOptional, cl::desc("Print machine instrs"),
- cl::value_desc("pass-name"), cl::init("option-unspecified"), cl::Hidden);
+// FIXME: remove this after switching to NPM or GlobalISel, whichever gets there
+// first...
+static cl::opt<bool>
+ PrintAfterISel("print-after-isel", cl::init(false), cl::Hidden,
+ cl::desc("Print machine instrs after ISel"));
static cl::opt<GlobalISelAbortMode> EnableGlobalISelAbort(
"global-isel-abort", cl::Hidden,
@@ -170,7 +180,6 @@ static cl::opt<bool> EarlyLiveIntervals("early-live-intervals", cl::Hidden,
cl::desc("Run live interval analysis earlier in the pipeline"));
// Experimental option to use CFL-AA in codegen
-enum class CFLAAType { None, Steensgaard, Andersen, Both };
static cl::opt<CFLAAType> UseCFLAA(
"use-cfl-aa-in-codegen", cl::init(CFLAAType::None), cl::Hidden,
cl::desc("Enable the new, experimental CFL alias analysis in CodeGen"),
@@ -210,6 +219,17 @@ static cl::opt<std::string>
cl::desc("Stop compilation before a specific pass"),
cl::value_desc("pass-name"), cl::init(""), cl::Hidden);
+/// Enable the machine function splitter pass.
+static cl::opt<bool> EnableMachineFunctionSplitter(
+ "enable-split-machine-functions", cl::Hidden,
+ cl::desc("Split out cold blocks from machine functions based on profile "
+ "information."));
+
+/// Disable the expand reductions pass for testing.
+static cl::opt<bool> DisableExpandReductions(
+ "disable-expand-reductions", cl::init(false), cl::Hidden,
+ cl::desc("Disable the expand reduction intrinsics pass from running"));
+
/// Allow standard passes to be disabled by command line options. This supports
/// simple binary flags that either suppress the pass or do nothing.
/// i.e. -disable-mypass=false has no effect.
@@ -294,12 +314,11 @@ struct InsertedPass {
AnalysisID TargetPassID;
IdentifyingPassPtr InsertedPassID;
bool VerifyAfter;
- bool PrintAfter;
InsertedPass(AnalysisID TargetPassID, IdentifyingPassPtr InsertedPassID,
- bool VerifyAfter, bool PrintAfter)
+ bool VerifyAfter)
: TargetPassID(TargetPassID), InsertedPassID(InsertedPassID),
- VerifyAfter(VerifyAfter), PrintAfter(PrintAfter) {}
+ VerifyAfter(VerifyAfter) {}
Pass *getInsertedPass() const {
assert(InsertedPassID.isValid() && "Illegal Pass ID!");
@@ -397,6 +416,145 @@ void TargetPassConfig::setStartStopPasses() {
Started = (StartAfter == nullptr) && (StartBefore == nullptr);
}
+CGPassBuilderOption llvm::getCGPassBuilderOption() {
+ CGPassBuilderOption Opt;
+
+#define SET_OPTION(Option) \
+ if (Option.getNumOccurrences()) \
+ Opt.Option = Option;
+
+ SET_OPTION(EnableFastISelOption)
+ SET_OPTION(EnableGlobalISelAbort)
+ SET_OPTION(EnableGlobalISelOption)
+ SET_OPTION(EnableIPRA)
+ SET_OPTION(OptimizeRegAlloc)
+ SET_OPTION(VerifyMachineCode)
+
+#define SET_BOOLEAN_OPTION(Option) Opt.Option = Option;
+
+ SET_BOOLEAN_OPTION(EarlyLiveIntervals)
+ SET_BOOLEAN_OPTION(EnableBlockPlacementStats)
+ SET_BOOLEAN_OPTION(EnableImplicitNullChecks)
+ SET_BOOLEAN_OPTION(EnableMachineOutliner)
+ SET_BOOLEAN_OPTION(MISchedPostRA)
+ SET_BOOLEAN_OPTION(UseCFLAA)
+ SET_BOOLEAN_OPTION(DisableMergeICmps)
+ SET_BOOLEAN_OPTION(DisableLSR)
+ SET_BOOLEAN_OPTION(DisableConstantHoisting)
+ SET_BOOLEAN_OPTION(DisableCGP)
+ SET_BOOLEAN_OPTION(DisablePartialLibcallInlining)
+ SET_BOOLEAN_OPTION(PrintLSR)
+ SET_BOOLEAN_OPTION(PrintISelInput)
+ SET_BOOLEAN_OPTION(PrintGCInfo)
+
+ return Opt;
+}
+
+static void registerPartialPipelineCallback(PassInstrumentationCallbacks &PIC,
+ LLVMTargetMachine &LLVMTM) {
+ StringRef StartBefore;
+ StringRef StartAfter;
+ StringRef StopBefore;
+ StringRef StopAfter;
+
+ unsigned StartBeforeInstanceNum = 0;
+ unsigned StartAfterInstanceNum = 0;
+ unsigned StopBeforeInstanceNum = 0;
+ unsigned StopAfterInstanceNum = 0;
+
+ std::tie(StartBefore, StartBeforeInstanceNum) =
+ getPassNameAndInstanceNum(StartBeforeOpt);
+ std::tie(StartAfter, StartAfterInstanceNum) =
+ getPassNameAndInstanceNum(StartAfterOpt);
+ std::tie(StopBefore, StopBeforeInstanceNum) =
+ getPassNameAndInstanceNum(StopBeforeOpt);
+ std::tie(StopAfter, StopAfterInstanceNum) =
+ getPassNameAndInstanceNum(StopAfterOpt);
+
+ if (StartBefore.empty() && StartAfter.empty() && StopBefore.empty() &&
+ StopAfter.empty())
+ return;
+
+ std::tie(StartBefore, std::ignore) =
+ LLVMTM.getPassNameFromLegacyName(StartBefore);
+ std::tie(StartAfter, std::ignore) =
+ LLVMTM.getPassNameFromLegacyName(StartAfter);
+ std::tie(StopBefore, std::ignore) =
+ LLVMTM.getPassNameFromLegacyName(StopBefore);
+ std::tie(StopAfter, std::ignore) =
+ LLVMTM.getPassNameFromLegacyName(StopAfter);
+ if (!StartBefore.empty() && !StartAfter.empty())
+ report_fatal_error(Twine(StartBeforeOptName) + Twine(" and ") +
+ Twine(StartAfterOptName) + Twine(" specified!"));
+ if (!StopBefore.empty() && !StopAfter.empty())
+ report_fatal_error(Twine(StopBeforeOptName) + Twine(" and ") +
+ Twine(StopAfterOptName) + Twine(" specified!"));
+
+ PIC.registerShouldRunOptionalPassCallback(
+ [=, EnableCurrent = StartBefore.empty() && StartAfter.empty(),
+ EnableNext = Optional<bool>(), StartBeforeCount = 0u,
+ StartAfterCount = 0u, StopBeforeCount = 0u,
+ StopAfterCount = 0u](StringRef P, Any) mutable {
+ bool StartBeforePass = !StartBefore.empty() && P.contains(StartBefore);
+ bool StartAfterPass = !StartAfter.empty() && P.contains(StartAfter);
+ bool StopBeforePass = !StopBefore.empty() && P.contains(StopBefore);
+ bool StopAfterPass = !StopAfter.empty() && P.contains(StopAfter);
+
+ // Implement -start-after/-stop-after
+ if (EnableNext) {
+ EnableCurrent = *EnableNext;
+ EnableNext.reset();
+ }
+
+ // Using PIC.registerAfterPassCallback won't work because if this
+ // callback returns false, AfterPassCallback is also skipped.
+ if (StartAfterPass && StartAfterCount++ == StartAfterInstanceNum) {
+ assert(!EnableNext && "Error: assign to EnableNext more than once");
+ EnableNext = true;
+ }
+ if (StopAfterPass && StopAfterCount++ == StopAfterInstanceNum) {
+ assert(!EnableNext && "Error: assign to EnableNext more than once");
+ EnableNext = false;
+ }
+
+ if (StartBeforePass && StartBeforeCount++ == StartBeforeInstanceNum)
+ EnableCurrent = true;
+ if (StopBeforePass && StopBeforeCount++ == StopBeforeInstanceNum)
+ EnableCurrent = false;
+ return EnableCurrent;
+ });
+}
+
+void llvm::registerCodeGenCallback(PassInstrumentationCallbacks &PIC,
+ LLVMTargetMachine &LLVMTM) {
+
+ // Register a callback for disabling passes.
+ PIC.registerShouldRunOptionalPassCallback([](StringRef P, Any) {
+
+#define DISABLE_PASS(Option, Name) \
+ if (Option && P.contains(#Name)) \
+ return false;
+ DISABLE_PASS(DisableBlockPlacement, MachineBlockPlacementPass)
+ DISABLE_PASS(DisableBranchFold, BranchFolderPass)
+ DISABLE_PASS(DisableCopyProp, MachineCopyPropagationPass)
+ DISABLE_PASS(DisableEarlyIfConversion, EarlyIfConverterPass)
+ DISABLE_PASS(DisableEarlyTailDup, EarlyTailDuplicatePass)
+ DISABLE_PASS(DisableMachineCSE, MachineCSEPass)
+ DISABLE_PASS(DisableMachineDCE, DeadMachineInstructionElimPass)
+ DISABLE_PASS(DisableMachineLICM, EarlyMachineLICMPass)
+ DISABLE_PASS(DisableMachineSink, MachineSinkingPass)
+ DISABLE_PASS(DisablePostRAMachineLICM, MachineLICMPass)
+ DISABLE_PASS(DisablePostRAMachineSink, PostRAMachineSinkingPass)
+ DISABLE_PASS(DisablePostRASched, PostRASchedulerPass)
+ DISABLE_PASS(DisableSSC, StackSlotColoringPass)
+ DISABLE_PASS(DisableTailDuplicate, TailDuplicatePass)
+
+ return true;
+ });
+
+ registerPartialPipelineCallback(PIC, LLVMTM);
+}
+
// Out of line constructor provides default values for pass options and
// registers all common codegen passes.
TargetPassConfig::TargetPassConfig(LLVMTargetMachine &TM, PassManagerBase &pm)
@@ -411,9 +569,6 @@ TargetPassConfig::TargetPassConfig(LLVMTargetMachine &TM, PassManagerBase &pm)
initializeBasicAAWrapperPassPass(*PassRegistry::getPassRegistry());
initializeAAResultsWrapperPassPass(*PassRegistry::getPassRegistry());
- if (StringRef(PrintMachineInstrs.getValue()).equals(""))
- TM.Options.PrintMachineCode = true;
-
if (EnableIPRA.getNumOccurrences())
TM.Options.EnableIPRA = EnableIPRA;
else {
@@ -437,14 +592,13 @@ CodeGenOpt::Level TargetPassConfig::getOptLevel() const {
/// Insert InsertedPassID pass after TargetPassID.
void TargetPassConfig::insertPass(AnalysisID TargetPassID,
IdentifyingPassPtr InsertedPassID,
- bool VerifyAfter, bool PrintAfter) {
+ bool VerifyAfter) {
assert(((!InsertedPassID.isInstance() &&
TargetPassID != InsertedPassID.getID()) ||
(InsertedPassID.isInstance() &&
TargetPassID != InsertedPassID.getInstance()->getPassID())) &&
"Insert a pass after itself!");
- Impl->InsertedPasses.emplace_back(TargetPassID, InsertedPassID, VerifyAfter,
- PrintAfter);
+ Impl->InsertedPasses.emplace_back(TargetPassID, InsertedPassID, VerifyAfter);
}
/// createPassConfig - Create a pass configuration object to be used by
@@ -522,7 +676,7 @@ bool TargetPassConfig::isPassSubstitutedOrOverridden(AnalysisID ID) const {
/// a later pass or that it should stop after an earlier pass, then do not add
/// the pass. Finally, compare the current pass against the StartAfter
/// and StopAfter options and change the Started/Stopped flags accordingly.
-void TargetPassConfig::addPass(Pass *P, bool verifyAfter, bool printAfter) {
+void TargetPassConfig::addPass(Pass *P, bool verifyAfter) {
assert(!Initialized && "PassConfig is immutable");
// Cache the Pass ID here in case the pass manager finds this pass is
@@ -540,17 +694,16 @@ void TargetPassConfig::addPass(Pass *P, bool verifyAfter, bool printAfter) {
addMachinePrePasses();
std::string Banner;
// Construct banner message before PM->add() as that may delete the pass.
- if (AddingMachinePasses && (printAfter || verifyAfter))
+ if (AddingMachinePasses && verifyAfter)
Banner = std::string("After ") + std::string(P->getPassName());
PM->add(P);
if (AddingMachinePasses)
- addMachinePostPasses(Banner, /*AllowPrint*/ printAfter,
- /*AllowVerify*/ verifyAfter);
+ addMachinePostPasses(Banner, /*AllowVerify*/ verifyAfter);
// Add the passes after the pass P if there is any.
- for (auto IP : Impl->InsertedPasses) {
+ for (const auto &IP : Impl->InsertedPasses) {
if (IP.TargetPassID == PassID)
- addPass(IP.getInsertedPass(), IP.VerifyAfter, IP.PrintAfter);
+ addPass(IP.getInsertedPass(), IP.VerifyAfter);
}
} else {
delete P;
@@ -570,8 +723,7 @@ void TargetPassConfig::addPass(Pass *P, bool verifyAfter, bool printAfter) {
///
/// addPass cannot return a pointer to the pass instance because is internal the
/// PassManager and the instance we create here may already be freed.
-AnalysisID TargetPassConfig::addPass(AnalysisID PassID, bool verifyAfter,
- bool printAfter) {
+AnalysisID TargetPassConfig::addPass(AnalysisID PassID, bool verifyAfter) {
IdentifyingPassPtr TargetID = getPassSubstitution(PassID);
IdentifyingPassPtr FinalPtr = overridePass(PassID, TargetID);
if (!FinalPtr.isValid())
@@ -586,7 +738,7 @@ AnalysisID TargetPassConfig::addPass(AnalysisID PassID, bool verifyAfter,
llvm_unreachable("Pass ID not registered");
}
AnalysisID FinalID = P->getPassID();
- addPass(P, verifyAfter, printAfter); // Ends the lifetime of P.
+ addPass(P, verifyAfter); // Ends the lifetime of P.
return FinalID;
}
@@ -597,7 +749,7 @@ void TargetPassConfig::printAndVerify(const std::string &Banner) {
}
void TargetPassConfig::addPrintPass(const std::string &Banner) {
- if (TM->shouldPrintMachineCode())
+ if (PrintAfterISel)
PM->add(createMachineFunctionPrinterPass(dbgs(), Banner));
}
@@ -619,18 +771,26 @@ void TargetPassConfig::addStripDebugPass() {
PM->add(createStripDebugMachineModulePass(/*OnlyDebugified=*/true));
}
+void TargetPassConfig::addCheckDebugPass() {
+ PM->add(createCheckDebugMachineModulePass());
+}
+
void TargetPassConfig::addMachinePrePasses(bool AllowDebugify) {
- if (AllowDebugify && DebugifyAndStripAll == cl::BOU_TRUE && DebugifyIsSafe)
+ if (AllowDebugify && DebugifyIsSafe &&
+ (DebugifyAndStripAll == cl::BOU_TRUE ||
+ DebugifyCheckAndStripAll == cl::BOU_TRUE))
addDebugifyPass();
}
void TargetPassConfig::addMachinePostPasses(const std::string &Banner,
- bool AllowPrint, bool AllowVerify,
- bool AllowStrip) {
- if (DebugifyAndStripAll == cl::BOU_TRUE && DebugifyIsSafe)
- addStripDebugPass();
- if (AllowPrint)
- addPrintPass(Banner);
+ bool AllowVerify, bool AllowStrip) {
+ if (DebugifyIsSafe) {
+ if (DebugifyCheckAndStripAll == cl::BOU_TRUE) {
+ addCheckDebugPass();
+ addStripDebugPass();
+ } else if (DebugifyAndStripAll == cl::BOU_TRUE)
+ addStripDebugPass();
+ }
if (AllowVerify)
addVerifyPass(Banner);
}
@@ -707,10 +867,12 @@ void TargetPassConfig::addIRPasses() {
// Add scalarization of target's unsupported masked memory intrinsics pass.
// the unsupported intrinsic will be replaced with a chain of basic blocks,
// that stores/loads element one-by-one if the appropriate mask bit is set.
- addPass(createScalarizeMaskedMemIntrinPass());
+ addPass(createScalarizeMaskedMemIntrinLegacyPass());
// Expand reduction intrinsics into shuffle sequences if the target wants to.
- addPass(createExpandReductionsPass());
+ // Allow disabling it for testing purposes.
+ if (!DisableExpandReductions)
+ addPass(createExpandReductionsPass());
}
/// Turn exception handling constructs into something the code generators can
@@ -730,6 +892,7 @@ void TargetPassConfig::addPassesToHandleExceptions() {
LLVM_FALLTHROUGH;
case ExceptionHandling::DwarfCFI:
case ExceptionHandling::ARM:
+ case ExceptionHandling::AIX:
addPass(createDwarfEHPass(getOptLevel()));
break;
case ExceptionHandling::WinEH:
@@ -879,7 +1042,7 @@ bool TargetPassConfig::addISelPasses() {
addPass(createLowerEmuTLSPass());
addPass(createPreISelIntrinsicLoweringPass());
- addPass(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis()));
+ PM->add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis()));
addIRPasses();
addCodeGenPrepare();
addPassesToHandleExceptions();
@@ -916,20 +1079,6 @@ static cl::opt<RegisterRegAlloc::FunctionPassCtor, false,
void TargetPassConfig::addMachinePasses() {
AddingMachinePasses = true;
- // Insert a machine instr printer pass after the specified pass.
- StringRef PrintMachineInstrsPassName = PrintMachineInstrs.getValue();
- if (!PrintMachineInstrsPassName.equals("") &&
- !PrintMachineInstrsPassName.equals("option-unspecified")) {
- if (const PassInfo *TPI = getPassInfo(PrintMachineInstrsPassName)) {
- const PassRegistry *PR = PassRegistry::getPassRegistry();
- const PassInfo *IPI = PR->getPassInfo(StringRef("machineinstr-printer"));
- assert(IPI && "failed to get \"machineinstr-printer\" PassInfo!");
- const char *TID = (const char *)(TPI->getTypeInfo());
- const char *IID = (const char *)(IPI->getTypeInfo());
- insertPass(TID, IID);
- }
- }
-
// Add passes that optimize machine instructions in SSA form.
if (getOptLevel() != CodeGenOpt::None) {
addMachineSSAOptimization();
@@ -1000,7 +1149,7 @@ void TargetPassConfig::addMachinePasses() {
// GC
if (addGCPasses()) {
if (PrintGCInfo)
- addPass(createGCInfoPrinter(dbgs()), false, false);
+ addPass(createGCInfoPrinter(dbgs()), false);
}
// Basic block placement.
@@ -1028,20 +1177,31 @@ void TargetPassConfig::addMachinePasses() {
addPass(&LiveDebugValuesID, false);
if (TM->Options.EnableMachineOutliner && getOptLevel() != CodeGenOpt::None &&
- EnableMachineOutliner != NeverOutline) {
- bool RunOnAllFunctions = (EnableMachineOutliner == AlwaysOutline);
- bool AddOutliner = RunOnAllFunctions ||
- TM->Options.SupportsDefaultOutlining;
+ EnableMachineOutliner != RunOutliner::NeverOutline) {
+ bool RunOnAllFunctions =
+ (EnableMachineOutliner == RunOutliner::AlwaysOutline);
+ bool AddOutliner =
+ RunOnAllFunctions || TM->Options.SupportsDefaultOutlining;
if (AddOutliner)
addPass(createMachineOutlinerPass(RunOnAllFunctions));
}
- if (TM->getBBSectionsType() != llvm::BasicBlockSection::None)
- addPass(llvm::createBBSectionsPreparePass(TM->getBBSectionsFuncListBuf()));
+ // Machine function splitter uses the basic block sections feature. Both
+ // cannot be enabled at the same time.
+ if (TM->Options.EnableMachineFunctionSplitter ||
+ EnableMachineFunctionSplitter) {
+ addPass(createMachineFunctionSplitterPass());
+ } else if (TM->getBBSectionsType() != llvm::BasicBlockSection::None) {
+ addPass(llvm::createBasicBlockSectionsPass(TM->getBBSectionsFuncListBuf()));
+ }
// Add passes that directly emit MI after all other MI passes.
addPreEmitPass2();
+ // Insert pseudo probe annotation for callsite profiling
+ if (TM->Options.PseudoProbeForProfiling)
+ addPass(createPseudoProbeInserter());
+
AddingMachinePasses = false;
}
@@ -1148,7 +1308,7 @@ FunctionPass *TargetPassConfig::createRegAllocPass(bool Optimized) {
return createTargetRegisterAllocator(Optimized);
}
-bool TargetPassConfig::addRegAssignmentFast() {
+bool TargetPassConfig::addRegAssignAndRewriteFast() {
if (RegAlloc != &useDefaultRegisterAllocator &&
RegAlloc != &createFastRegisterAllocator)
report_fatal_error("Must use fast (default) register allocator for unoptimized regalloc.");
@@ -1157,7 +1317,7 @@ bool TargetPassConfig::addRegAssignmentFast() {
return true;
}
-bool TargetPassConfig::addRegAssignmentOptimized() {
+bool TargetPassConfig::addRegAssignAndRewriteOptimized() {
// Add the selected register allocation pass.
addPass(createRegAllocPass(true));
@@ -1167,12 +1327,6 @@ bool TargetPassConfig::addRegAssignmentOptimized() {
// Finally rewrite virtual registers.
addPass(&VirtRegRewriterID);
- // Perform stack slot coloring and post-ra machine LICM.
- //
- // FIXME: Re-enable coloring with register when it's capable of adding
- // kill markers.
- addPass(&StackSlotColoringID);
-
return true;
}
@@ -1188,7 +1342,7 @@ void TargetPassConfig::addFastRegAlloc() {
addPass(&PHIEliminationID, false);
addPass(&TwoAddressInstructionPassID, false);
- addRegAssignmentFast();
+ addRegAssignAndRewriteFast();
}
/// Add standard target-independent passes that are tightly coupled with
@@ -1205,6 +1359,11 @@ void TargetPassConfig::addOptimizedRegAlloc() {
// LiveVariables can be removed completely, and LiveIntervals can be directly
// computed. (We still either need to regenerate kill flags after regalloc, or
// preferably fix the scavenger to not depend on them).
+ // FIXME: UnreachableMachineBlockElim is a dependant pass of LiveVariables.
+ // When LiveVariables is removed this has to be removed/moved either.
+ // Explicit addition of UnreachableMachineBlockElim allows stopping before or
+ // after it with -stop-before/-stop-after.
+ addPass(&UnreachableMachineBlockElimID, false);
addPass(&LiveVariablesID, false);
// Edge splitting is smarter with machine loop info.
@@ -1226,7 +1385,13 @@ void TargetPassConfig::addOptimizedRegAlloc() {
// PreRA instruction scheduling.
addPass(&MachineSchedulerID);
- if (addRegAssignmentOptimized()) {
+ if (addRegAssignAndRewriteOptimized()) {
+ // Perform stack slot coloring and post-ra machine LICM.
+ //
+ // FIXME: Re-enable coloring with register when it's capable of adding
+ // kill markers.
+ addPass(&StackSlotColoringID);
+
// Allow targets to expand pseudo instructions depending on the choice of
// registers before MachineCopyPropagation.
addPostRewrite();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp
index e2ef12d8ac77..5fd7eef5808f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp
@@ -26,6 +26,7 @@
#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Attributes.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/Function.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
@@ -68,7 +69,7 @@ bool TargetRegisterInfo::shouldRegionSplitForVirtReg(
const MachineFunction &MF, const LiveInterval &VirtReg) const {
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
const MachineRegisterInfo &MRI = MF.getRegInfo();
- MachineInstr *MI = MRI.getUniqueVRegDef(VirtReg.reg);
+ MachineInstr *MI = MRI.getUniqueVRegDef(VirtReg.reg());
if (MI && TII->isTriviallyReMaterializable(*MI) &&
VirtReg.size() > HugeSizeForSplit)
return false;
@@ -532,6 +533,56 @@ TargetRegisterInfo::lookThruCopyLike(Register SrcReg,
}
}
+Register TargetRegisterInfo::lookThruSingleUseCopyChain(
+ Register SrcReg, const MachineRegisterInfo *MRI) const {
+ while (true) {
+ const MachineInstr *MI = MRI->getVRegDef(SrcReg);
+ // Found the real definition, return it if it has a single use.
+ if (!MI->isCopyLike())
+ return MRI->hasOneNonDBGUse(SrcReg) ? SrcReg : Register();
+
+ Register CopySrcReg;
+ if (MI->isCopy())
+ CopySrcReg = MI->getOperand(1).getReg();
+ else {
+ assert(MI->isSubregToReg() && "Bad opcode for lookThruCopyLike");
+ CopySrcReg = MI->getOperand(2).getReg();
+ }
+
+ // Continue only if the next definition in the chain is for a virtual
+ // register that has a single use.
+ if (!CopySrcReg.isVirtual() || !MRI->hasOneNonDBGUse(CopySrcReg))
+ return Register();
+
+ SrcReg = CopySrcReg;
+ }
+}
+
+void TargetRegisterInfo::getOffsetOpcodes(
+ const StackOffset &Offset, SmallVectorImpl<uint64_t> &Ops) const {
+ assert(!Offset.getScalable() && "Scalable offsets are not handled");
+ DIExpression::appendOffset(Ops, Offset.getFixed());
+}
+
+DIExpression *
+TargetRegisterInfo::prependOffsetExpression(const DIExpression *Expr,
+ unsigned PrependFlags,
+ const StackOffset &Offset) const {
+ assert((PrependFlags &
+ ~(DIExpression::DerefBefore | DIExpression::DerefAfter |
+ DIExpression::StackValue | DIExpression::EntryValue)) == 0 &&
+ "Unsupported prepend flag");
+ SmallVector<uint64_t, 16> OffsetExpr;
+ if (PrependFlags & DIExpression::DerefBefore)
+ OffsetExpr.push_back(dwarf::DW_OP_deref);
+ getOffsetOpcodes(Offset, OffsetExpr);
+ if (PrependFlags & DIExpression::DerefAfter)
+ OffsetExpr.push_back(dwarf::DW_OP_deref);
+ return DIExpression::prependOpcodes(Expr, OffsetExpr,
+ PrependFlags & DIExpression::StackValue,
+ PrependFlags & DIExpression::EntryValue);
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD
void TargetRegisterInfo::dumpReg(Register Reg, unsigned SubRegIndex,
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetSubtargetInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetSubtargetInfo.cpp
index 63766df4d2be..e4520d8ccb1e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetSubtargetInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetSubtargetInfo.cpp
@@ -15,13 +15,12 @@
using namespace llvm;
TargetSubtargetInfo::TargetSubtargetInfo(
- const Triple &TT, StringRef CPU, StringRef FS,
+ const Triple &TT, StringRef CPU, StringRef TuneCPU, StringRef FS,
ArrayRef<SubtargetFeatureKV> PF, ArrayRef<SubtargetSubTypeKV> PD,
- const MCWriteProcResEntry *WPR,
- const MCWriteLatencyEntry *WL, const MCReadAdvanceEntry *RA,
- const InstrStage *IS, const unsigned *OC, const unsigned *FP)
- : MCSubtargetInfo(TT, CPU, FS, PF, PD, WPR, WL, RA, IS, OC, FP) {
-}
+ const MCWriteProcResEntry *WPR, const MCWriteLatencyEntry *WL,
+ const MCReadAdvanceEntry *RA, const InstrStage *IS, const unsigned *OC,
+ const unsigned *FP)
+ : MCSubtargetInfo(TT, CPU, TuneCPU, FS, PF, PD, WPR, WL, RA, IS, OC, FP) {}
TargetSubtargetInfo::~TargetSubtargetInfo() = default;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
index 615ff4b8789c..ecee4aed7f88 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -111,37 +111,35 @@ class TwoAddressInstructionPass : public MachineFunctionPass {
// A map from virtual registers to physical registers which are likely targets
// to be coalesced to due to copies from physical registers to virtual
// registers. e.g. v1024 = move r0.
- DenseMap<unsigned, unsigned> SrcRegMap;
+ DenseMap<Register, Register> SrcRegMap;
// A map from virtual registers to physical registers which are likely targets
// to be coalesced to due to copies to physical registers from virtual
// registers. e.g. r1 = move v1024.
- DenseMap<unsigned, unsigned> DstRegMap;
+ DenseMap<Register, Register> DstRegMap;
- bool isRevCopyChain(unsigned FromReg, unsigned ToReg, int Maxlen);
+ bool isRevCopyChain(Register FromReg, Register ToReg, int Maxlen);
- bool noUseAfterLastDef(unsigned Reg, unsigned Dist, unsigned &LastDef);
+ bool noUseAfterLastDef(Register Reg, unsigned Dist, unsigned &LastDef);
- bool isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
+ bool isProfitableToCommute(Register RegA, Register RegB, Register RegC,
MachineInstr *MI, unsigned Dist);
bool commuteInstruction(MachineInstr *MI, unsigned DstIdx,
unsigned RegBIdx, unsigned RegCIdx, unsigned Dist);
- bool isProfitableToConv3Addr(unsigned RegA, unsigned RegB);
+ bool isProfitableToConv3Addr(Register RegA, Register RegB);
bool convertInstTo3Addr(MachineBasicBlock::iterator &mi,
- MachineBasicBlock::iterator &nmi,
- unsigned RegA, unsigned RegB, unsigned Dist);
+ MachineBasicBlock::iterator &nmi, Register RegA,
+ Register RegB, unsigned Dist);
- bool isDefTooClose(unsigned Reg, unsigned Dist, MachineInstr *MI);
+ bool isDefTooClose(Register Reg, unsigned Dist, MachineInstr *MI);
bool rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
- MachineBasicBlock::iterator &nmi,
- unsigned Reg);
+ MachineBasicBlock::iterator &nmi, Register Reg);
bool rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
- MachineBasicBlock::iterator &nmi,
- unsigned Reg);
+ MachineBasicBlock::iterator &nmi, Register Reg);
bool tryInstructionTransform(MachineBasicBlock::iterator &mi,
MachineBasicBlock::iterator &nmi,
@@ -153,7 +151,7 @@ class TwoAddressInstructionPass : public MachineFunctionPass {
unsigned BaseOpIdx,
bool BaseOpKilled,
unsigned Dist);
- void scanUses(unsigned DstReg);
+ void scanUses(Register DstReg);
void processCopy(MachineInstr *MI);
@@ -199,10 +197,10 @@ INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(TwoAddressInstructionPass, DEBUG_TYPE,
"Two-Address instruction pass", false, false)
-static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg, LiveIntervals *LIS);
+static bool isPlainlyKilled(MachineInstr *MI, Register Reg, LiveIntervals *LIS);
/// Return the MachineInstr* if it is the single def of the Reg in current BB.
-static MachineInstr *getSingleDef(unsigned Reg, MachineBasicBlock *BB,
+static MachineInstr *getSingleDef(Register Reg, MachineBasicBlock *BB,
const MachineRegisterInfo *MRI) {
MachineInstr *Ret = nullptr;
for (MachineInstr &DefMI : MRI->def_instructions(Reg)) {
@@ -223,9 +221,9 @@ static MachineInstr *getSingleDef(unsigned Reg, MachineBasicBlock *BB,
/// %Tmp2 = copy %ToReg;
/// MaxLen specifies the maximum length of the copy chain the func
/// can walk through.
-bool TwoAddressInstructionPass::isRevCopyChain(unsigned FromReg, unsigned ToReg,
+bool TwoAddressInstructionPass::isRevCopyChain(Register FromReg, Register ToReg,
int Maxlen) {
- unsigned TmpReg = FromReg;
+ Register TmpReg = FromReg;
for (int i = 0; i < Maxlen; i++) {
MachineInstr *Def = getSingleDef(TmpReg, MBB, MRI);
if (!Def || !Def->isCopy())
@@ -243,7 +241,7 @@ bool TwoAddressInstructionPass::isRevCopyChain(unsigned FromReg, unsigned ToReg,
/// in the MBB that defines the specified register and the two-address
/// instruction which is being processed. It also returns the last def location
/// by reference.
-bool TwoAddressInstructionPass::noUseAfterLastDef(unsigned Reg, unsigned Dist,
+bool TwoAddressInstructionPass::noUseAfterLastDef(Register Reg, unsigned Dist,
unsigned &LastDef) {
LastDef = 0;
unsigned LastUse = Dist;
@@ -267,8 +265,8 @@ bool TwoAddressInstructionPass::noUseAfterLastDef(unsigned Reg, unsigned Dist,
/// instruction. It also returns the source and destination registers and
/// whether they are physical registers by reference.
static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII,
- unsigned &SrcReg, unsigned &DstReg,
- bool &IsSrcPhys, bool &IsDstPhys) {
+ Register &SrcReg, Register &DstReg, bool &IsSrcPhys,
+ bool &IsDstPhys) {
SrcReg = 0;
DstReg = 0;
if (MI.isCopy()) {
@@ -277,19 +275,20 @@ static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII,
} else if (MI.isInsertSubreg() || MI.isSubregToReg()) {
DstReg = MI.getOperand(0).getReg();
SrcReg = MI.getOperand(2).getReg();
- } else
+ } else {
return false;
+ }
- IsSrcPhys = Register::isPhysicalRegister(SrcReg);
- IsDstPhys = Register::isPhysicalRegister(DstReg);
+ IsSrcPhys = SrcReg.isPhysical();
+ IsDstPhys = DstReg.isPhysical();
return true;
}
/// Test if the given register value, which is used by the
/// given instruction, is killed by the given instruction.
-static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg,
+static bool isPlainlyKilled(MachineInstr *MI, Register Reg,
LiveIntervals *LIS) {
- if (LIS && Register::isVirtualRegister(Reg) && !LIS->isNotInMIMap(*MI)) {
+ if (LIS && Reg.isVirtual() && !LIS->isNotInMIMap(*MI)) {
// FIXME: Sometimes tryInstructionTransform() will add instructions and
// test whether they can be folded before keeping them. In this case it
// sets a kill before recursively calling tryInstructionTransform() again.
@@ -328,20 +327,17 @@ static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg,
///
/// If allowFalsePositives is true then likely kills are treated as kills even
/// if it can't be proven that they are kills.
-static bool isKilled(MachineInstr &MI, unsigned Reg,
- const MachineRegisterInfo *MRI,
- const TargetInstrInfo *TII,
- LiveIntervals *LIS,
- bool allowFalsePositives) {
+static bool isKilled(MachineInstr &MI, Register Reg,
+ const MachineRegisterInfo *MRI, const TargetInstrInfo *TII,
+ LiveIntervals *LIS, bool allowFalsePositives) {
MachineInstr *DefMI = &MI;
while (true) {
// All uses of physical registers are likely to be kills.
- if (Register::isPhysicalRegister(Reg) &&
- (allowFalsePositives || MRI->hasOneUse(Reg)))
+ if (Reg.isPhysical() && (allowFalsePositives || MRI->hasOneUse(Reg)))
return true;
if (!isPlainlyKilled(DefMI, Reg, LIS))
return false;
- if (Register::isPhysicalRegister(Reg))
+ if (Reg.isPhysical())
return true;
MachineRegisterInfo::def_iterator Begin = MRI->def_begin(Reg);
// If there are multiple defs, we can't do a simple analysis, so just
@@ -350,7 +346,7 @@ static bool isKilled(MachineInstr &MI, unsigned Reg,
return true;
DefMI = Begin->getParent();
bool IsSrcPhys, IsDstPhys;
- unsigned SrcReg, DstReg;
+ Register SrcReg, DstReg;
// If the def is something other than a copy, then it isn't going to
// be coalesced, so follow the kill flag.
if (!isCopyToReg(*DefMI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys))
@@ -361,7 +357,7 @@ static bool isKilled(MachineInstr &MI, unsigned Reg,
/// Return true if the specified MI uses the specified register as a two-address
/// use. If so, return the destination register by reference.
-static bool isTwoAddrUse(MachineInstr &MI, unsigned Reg, unsigned &DstReg) {
+static bool isTwoAddrUse(MachineInstr &MI, Register Reg, Register &DstReg) {
for (unsigned i = 0, NumOps = MI.getNumOperands(); i != NumOps; ++i) {
const MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg)
@@ -377,19 +373,17 @@ static bool isTwoAddrUse(MachineInstr &MI, unsigned Reg, unsigned &DstReg) {
/// Given a register, if has a single in-basic block use, return the use
/// instruction if it's a copy or a two-address use.
-static
-MachineInstr *findOnlyInterestingUse(unsigned Reg, MachineBasicBlock *MBB,
- MachineRegisterInfo *MRI,
- const TargetInstrInfo *TII,
- bool &IsCopy,
- unsigned &DstReg, bool &IsDstPhys) {
+static MachineInstr *
+findOnlyInterestingUse(Register Reg, MachineBasicBlock *MBB,
+ MachineRegisterInfo *MRI, const TargetInstrInfo *TII,
+ bool &IsCopy, Register &DstReg, bool &IsDstPhys) {
if (!MRI->hasOneNonDBGUse(Reg))
// None or more than one use.
return nullptr;
MachineInstr &UseMI = *MRI->use_instr_nodbg_begin(Reg);
if (UseMI.getParent() != MBB)
return nullptr;
- unsigned SrcReg;
+ Register SrcReg;
bool IsSrcPhys;
if (isCopyToReg(UseMI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys)) {
IsCopy = true;
@@ -397,7 +391,7 @@ MachineInstr *findOnlyInterestingUse(unsigned Reg, MachineBasicBlock *MBB,
}
IsDstPhys = false;
if (isTwoAddrUse(UseMI, Reg, DstReg)) {
- IsDstPhys = Register::isPhysicalRegister(DstReg);
+ IsDstPhys = DstReg.isPhysical();
return &UseMI;
}
return nullptr;
@@ -405,22 +399,22 @@ MachineInstr *findOnlyInterestingUse(unsigned Reg, MachineBasicBlock *MBB,
/// Return the physical register the specified virtual register might be mapped
/// to.
-static unsigned
-getMappedReg(unsigned Reg, DenseMap<unsigned, unsigned> &RegMap) {
- while (Register::isVirtualRegister(Reg)) {
- DenseMap<unsigned, unsigned>::iterator SI = RegMap.find(Reg);
+static MCRegister getMappedReg(Register Reg,
+ DenseMap<Register, Register> &RegMap) {
+ while (Reg.isVirtual()) {
+ DenseMap<Register, Register>::iterator SI = RegMap.find(Reg);
if (SI == RegMap.end())
return 0;
Reg = SI->second;
}
- if (Register::isPhysicalRegister(Reg))
+ if (Reg.isPhysical())
return Reg;
return 0;
}
/// Return true if the two registers are equal or aliased.
-static bool
-regsAreCompatible(unsigned RegA, unsigned RegB, const TargetRegisterInfo *TRI) {
+static bool regsAreCompatible(Register RegA, Register RegB,
+ const TargetRegisterInfo *TRI) {
if (RegA == RegB)
return true;
if (!RegA || !RegB)
@@ -429,7 +423,7 @@ regsAreCompatible(unsigned RegA, unsigned RegB, const TargetRegisterInfo *TRI) {
}
// Returns true if Reg is equal or aliased to at least one register in Set.
-static bool regOverlapsSet(const SmallVectorImpl<unsigned> &Set, unsigned Reg,
+static bool regOverlapsSet(const SmallVectorImpl<Register> &Set, Register Reg,
const TargetRegisterInfo *TRI) {
for (unsigned R : Set)
if (TRI->regsOverlap(R, Reg))
@@ -440,10 +434,11 @@ static bool regOverlapsSet(const SmallVectorImpl<unsigned> &Set, unsigned Reg,
/// Return true if it's potentially profitable to commute the two-address
/// instruction that's being processed.
-bool
-TwoAddressInstructionPass::
-isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
- MachineInstr *MI, unsigned Dist) {
+bool TwoAddressInstructionPass::isProfitableToCommute(Register RegA,
+ Register RegB,
+ Register RegC,
+ MachineInstr *MI,
+ unsigned Dist) {
if (OptLevel == CodeGenOpt::None)
return false;
@@ -465,7 +460,7 @@ isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
// insert => %reg1030 = COPY %reg1029
// %reg1030 = ADD8rr killed %reg1029, killed %reg1028, implicit dead %eflags
- if (!isPlainlyKilled(MI, regC, LIS))
+ if (!isPlainlyKilled(MI, RegC, LIS))
return false;
// Ok, we have something like:
@@ -478,10 +473,10 @@ isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
// %reg1026 = ADD %reg1024, %reg1025
// r0 = MOV %reg1026
// Commute the ADD to hopefully eliminate an otherwise unavoidable copy.
- unsigned ToRegA = getMappedReg(regA, DstRegMap);
+ MCRegister ToRegA = getMappedReg(RegA, DstRegMap);
if (ToRegA) {
- unsigned FromRegB = getMappedReg(regB, SrcRegMap);
- unsigned FromRegC = getMappedReg(regC, SrcRegMap);
+ MCRegister FromRegB = getMappedReg(RegB, SrcRegMap);
+ MCRegister FromRegC = getMappedReg(RegC, SrcRegMap);
bool CompB = FromRegB && regsAreCompatible(FromRegB, ToRegA, TRI);
bool CompC = FromRegC && regsAreCompatible(FromRegC, ToRegA, TRI);
@@ -499,16 +494,16 @@ isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
return false;
}
- // If there is a use of regC between its last def (could be livein) and this
+ // If there is a use of RegC between its last def (could be livein) and this
// instruction, then bail.
unsigned LastDefC = 0;
- if (!noUseAfterLastDef(regC, Dist, LastDefC))
+ if (!noUseAfterLastDef(RegC, Dist, LastDefC))
return false;
- // If there is a use of regB between its last def (could be livein) and this
+ // If there is a use of RegB between its last def (could be livein) and this
// instruction, then go ahead and make this transformation.
unsigned LastDefB = 0;
- if (!noUseAfterLastDef(regB, Dist, LastDefB))
+ if (!noUseAfterLastDef(RegB, Dist, LastDefB))
return true;
// Look for situation like this:
@@ -526,14 +521,14 @@ isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
// To more generally minimize register copies, ideally the logic of two addr
// instruction pass should be integrated with register allocation pass where
// interference graph is available.
- if (isRevCopyChain(regC, regA, MaxDataFlowEdge))
+ if (isRevCopyChain(RegC, RegA, MaxDataFlowEdge))
return true;
- if (isRevCopyChain(regB, regA, MaxDataFlowEdge))
+ if (isRevCopyChain(RegB, RegA, MaxDataFlowEdge))
return false;
// Since there are no intervening uses for both registers, then commute
- // if the def of regC is closer. Its live interval is shorter.
+ // if the def of RegC is closer. Its live interval is shorter.
return LastDefB && LastDefC && LastDefC > LastDefB;
}
@@ -559,7 +554,7 @@ bool TwoAddressInstructionPass::commuteInstruction(MachineInstr *MI,
"instruction unless it was requested.");
// Update source register map.
- unsigned FromRegC = getMappedReg(RegC, SrcRegMap);
+ MCRegister FromRegC = getMappedReg(RegC, SrcRegMap);
if (FromRegC) {
Register RegA = MI->getOperand(DstIdx).getReg();
SrcRegMap[RegA] = FromRegC;
@@ -570,28 +565,26 @@ bool TwoAddressInstructionPass::commuteInstruction(MachineInstr *MI,
/// Return true if it is profitable to convert the given 2-address instruction
/// to a 3-address one.
-bool
-TwoAddressInstructionPass::isProfitableToConv3Addr(unsigned RegA,unsigned RegB){
+bool TwoAddressInstructionPass::isProfitableToConv3Addr(Register RegA,
+ Register RegB) {
// Look for situations like this:
// %reg1024 = MOV r1
// %reg1025 = MOV r0
// %reg1026 = ADD %reg1024, %reg1025
// r2 = MOV %reg1026
// Turn ADD into a 3-address instruction to avoid a copy.
- unsigned FromRegB = getMappedReg(RegB, SrcRegMap);
+ MCRegister FromRegB = getMappedReg(RegB, SrcRegMap);
if (!FromRegB)
return false;
- unsigned ToRegA = getMappedReg(RegA, DstRegMap);
+ MCRegister ToRegA = getMappedReg(RegA, DstRegMap);
return (ToRegA && !regsAreCompatible(FromRegB, ToRegA, TRI));
}
/// Convert the specified two-address instruction into a three address one.
/// Return true if this transformation was successful.
-bool
-TwoAddressInstructionPass::convertInstTo3Addr(MachineBasicBlock::iterator &mi,
- MachineBasicBlock::iterator &nmi,
- unsigned RegA, unsigned RegB,
- unsigned Dist) {
+bool TwoAddressInstructionPass::convertInstTo3Addr(
+ MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi,
+ Register RegA, Register RegB, unsigned Dist) {
// FIXME: Why does convertToThreeAddress() need an iterator reference?
MachineFunction::iterator MFI = MBB->getIterator();
MachineInstr *NewMI = TII->convertToThreeAddress(MFI, *mi, LV);
@@ -606,6 +599,24 @@ TwoAddressInstructionPass::convertInstTo3Addr(MachineBasicBlock::iterator &mi,
if (LIS)
LIS->ReplaceMachineInstrInMaps(*mi, *NewMI);
+ // If the old instruction is debug value tracked, an update is required.
+ if (auto OldInstrNum = mi->peekDebugInstrNum()) {
+ // Sanity check.
+ assert(mi->getNumExplicitDefs() == 1);
+ assert(NewMI->getNumExplicitDefs() == 1);
+
+ // Find the old and new def location.
+ auto OldIt = mi->defs().begin();
+ auto NewIt = NewMI->defs().begin();
+ unsigned OldIdx = mi->getOperandNo(OldIt);
+ unsigned NewIdx = NewMI->getOperandNo(NewIt);
+
+ // Record that one def has been replaced by the other.
+ unsigned NewInstrNum = NewMI->getDebugInstrNum();
+ MF->makeDebugValueSubstitution(std::make_pair(OldInstrNum, OldIdx),
+ std::make_pair(NewInstrNum, NewIdx));
+ }
+
MBB->erase(mi); // Nuke the old inst.
DistanceMap.insert(std::make_pair(NewMI, Dist));
@@ -620,13 +631,12 @@ TwoAddressInstructionPass::convertInstTo3Addr(MachineBasicBlock::iterator &mi,
/// Scan forward recursively for only uses, update maps if the use is a copy or
/// a two-address instruction.
-void
-TwoAddressInstructionPass::scanUses(unsigned DstReg) {
- SmallVector<unsigned, 4> VirtRegPairs;
+void TwoAddressInstructionPass::scanUses(Register DstReg) {
+ SmallVector<Register, 4> VirtRegPairs;
bool IsDstPhys;
bool IsCopy = false;
- unsigned NewReg = 0;
- unsigned Reg = DstReg;
+ Register NewReg;
+ Register Reg = DstReg;
while (MachineInstr *UseMI = findOnlyInterestingUse(Reg, MBB, MRI, TII,IsCopy,
NewReg, IsDstPhys)) {
if (IsCopy && !Processed.insert(UseMI).second)
@@ -682,13 +692,13 @@ void TwoAddressInstructionPass::processCopy(MachineInstr *MI) {
return;
bool IsSrcPhys, IsDstPhys;
- unsigned SrcReg, DstReg;
+ Register SrcReg, DstReg;
if (!isCopyToReg(*MI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys))
return;
- if (IsDstPhys && !IsSrcPhys)
+ if (IsDstPhys && !IsSrcPhys) {
DstRegMap.insert(std::make_pair(SrcReg, DstReg));
- else if (!IsDstPhys && IsSrcPhys) {
+ } else if (!IsDstPhys && IsSrcPhys) {
bool isNew = SrcRegMap.insert(std::make_pair(DstReg, SrcReg)).second;
if (!isNew)
assert(SrcRegMap[DstReg] == SrcReg &&
@@ -703,10 +713,9 @@ void TwoAddressInstructionPass::processCopy(MachineInstr *MI) {
/// If there is one more local instruction that reads 'Reg' and it kills 'Reg,
/// consider moving the instruction below the kill instruction in order to
/// eliminate the need for the copy.
-bool TwoAddressInstructionPass::
-rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
- MachineBasicBlock::iterator &nmi,
- unsigned Reg) {
+bool TwoAddressInstructionPass::rescheduleMIBelowKill(
+ MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi,
+ Register Reg) {
// Bail immediately if we don't have LV or LIS available. We use them to find
// kills efficiently.
if (!LV && !LIS)
@@ -743,7 +752,7 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
// Don't move pass calls, etc.
return false;
- unsigned DstReg;
+ Register DstReg;
if (isTwoAddrUse(*KillMI, Reg, DstReg))
return false;
@@ -755,9 +764,9 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
// FIXME: Needs more sophisticated heuristics.
return false;
- SmallVector<unsigned, 2> Uses;
- SmallVector<unsigned, 2> Kills;
- SmallVector<unsigned, 2> Defs;
+ SmallVector<Register, 2> Uses;
+ SmallVector<Register, 2> Kills;
+ SmallVector<Register, 2> Defs;
for (const MachineOperand &MO : MI->operands()) {
if (!MO.isReg())
continue;
@@ -872,7 +881,7 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
/// Return true if the re-scheduling will put the given instruction too close
/// to the defs of its register dependencies.
-bool TwoAddressInstructionPass::isDefTooClose(unsigned Reg, unsigned Dist,
+bool TwoAddressInstructionPass::isDefTooClose(Register Reg, unsigned Dist,
MachineInstr *MI) {
for (MachineInstr &DefMI : MRI->def_instructions(Reg)) {
if (DefMI.getParent() != MBB || DefMI.isCopy() || DefMI.isCopyLike())
@@ -893,10 +902,9 @@ bool TwoAddressInstructionPass::isDefTooClose(unsigned Reg, unsigned Dist,
/// If there is one more local instruction that reads 'Reg' and it kills 'Reg,
/// consider moving the kill instruction above the current two-address
/// instruction in order to eliminate the need for the copy.
-bool TwoAddressInstructionPass::
-rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
- MachineBasicBlock::iterator &nmi,
- unsigned Reg) {
+bool TwoAddressInstructionPass::rescheduleKillAboveMI(
+ MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi,
+ Register Reg) {
// Bail immediately if we don't have LV or LIS available. We use them to find
// kills efficiently.
if (!LV && !LIS)
@@ -928,7 +936,7 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
// Don't mess with copies, they may be coalesced later.
return false;
- unsigned DstReg;
+ Register DstReg;
if (isTwoAddrUse(*KillMI, Reg, DstReg))
return false;
@@ -936,10 +944,10 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
if (!KillMI->isSafeToMove(AA, SeenStore))
return false;
- SmallSet<unsigned, 2> Uses;
- SmallSet<unsigned, 2> Kills;
- SmallSet<unsigned, 2> Defs;
- SmallSet<unsigned, 2> LiveDefs;
+ SmallVector<Register, 2> Uses;
+ SmallVector<Register, 2> Kills;
+ SmallVector<Register, 2> Defs;
+ SmallVector<Register, 2> LiveDefs;
for (const MachineOperand &MO : KillMI->operands()) {
if (!MO.isReg())
continue;
@@ -952,13 +960,13 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
bool isKill = MO.isKill() || (LIS && isPlainlyKilled(KillMI, MOReg, LIS));
if (MOReg == Reg && !isKill)
return false;
- Uses.insert(MOReg);
+ Uses.push_back(MOReg);
if (isKill && MOReg != Reg)
- Kills.insert(MOReg);
- } else if (Register::isPhysicalRegister(MOReg)) {
- Defs.insert(MOReg);
+ Kills.push_back(MOReg);
+ } else if (MOReg.isPhysical()) {
+ Defs.push_back(MOReg);
if (!MO.isDead())
- LiveDefs.insert(MOReg);
+ LiveDefs.push_back(MOReg);
}
}
@@ -976,7 +984,7 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
OtherMI.isBranch() || OtherMI.isTerminator())
// Don't move pass calls, etc.
return false;
- SmallVector<unsigned, 2> OtherDefs;
+ SmallVector<Register, 2> OtherDefs;
for (const MachineOperand &MO : OtherMI.operands()) {
if (!MO.isReg())
continue;
@@ -984,11 +992,11 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
if (!MOReg)
continue;
if (MO.isUse()) {
- if (Defs.count(MOReg))
+ if (regOverlapsSet(Defs, MOReg, TRI))
// Moving KillMI can clobber the physical register if the def has
// not been seen.
return false;
- if (Kills.count(MOReg))
+ if (regOverlapsSet(Kills, MOReg, TRI))
// Don't want to extend other live ranges and update kills.
return false;
if (&OtherMI != MI && MOReg == Reg &&
@@ -1001,13 +1009,13 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
}
for (unsigned i = 0, e = OtherDefs.size(); i != e; ++i) {
- unsigned MOReg = OtherDefs[i];
- if (Uses.count(MOReg))
+ Register MOReg = OtherDefs[i];
+ if (regOverlapsSet(Uses, MOReg, TRI))
return false;
- if (Register::isPhysicalRegister(MOReg) && LiveDefs.count(MOReg))
+ if (MOReg.isPhysical() && regOverlapsSet(LiveDefs, MOReg, TRI))
return false;
// Physical register def is seen.
- Defs.erase(MOReg);
+ llvm::erase_value(Defs, MOReg);
}
}
@@ -1125,11 +1133,10 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
Register regA = MI.getOperand(DstIdx).getReg();
Register regB = MI.getOperand(SrcIdx).getReg();
- assert(Register::isVirtualRegister(regB) &&
- "cannot make instruction into two-address form");
+ assert(regB.isVirtual() && "cannot make instruction into two-address form");
bool regBKilled = isKilled(MI, regB, MRI, TII, LIS, true);
- if (Register::isVirtualRegister(regA))
+ if (regA.isVirtual())
scanUses(regA);
bool Commuted = tryInstructionCommute(&MI, DstIdx, SrcIdx, regBKilled, Dist);
@@ -1245,7 +1252,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
if (LV) {
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI.getOperand(i);
- if (MO.isReg() && Register::isVirtualRegister(MO.getReg())) {
+ if (MO.isReg() && MO.getReg().isVirtual()) {
if (MO.isUse()) {
if (MO.isKill()) {
if (NewMIs[0]->killsRegister(MO.getReg()))
@@ -1330,7 +1337,7 @@ collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) {
// Deal with undef uses immediately - simply rewrite the src operand.
if (SrcMO.isUndef() && !DstMO.getSubReg()) {
// Constrain the DstReg register class if required.
- if (Register::isVirtualRegister(DstReg))
+ if (DstReg.isVirtual())
if (const TargetRegisterClass *RC = TII->getRegClass(MCID, SrcIdx,
TRI, *MF))
MRI->constrainRegClass(DstReg, RC);
@@ -1360,7 +1367,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
bool AllUsesCopied = true;
unsigned LastCopiedReg = 0;
SlotIndex LastCopyIdx;
- unsigned RegB = 0;
+ Register RegB = 0;
unsigned SubRegB = 0;
for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) {
unsigned SrcIdx = TiedPairs[tpi].first;
@@ -1383,8 +1390,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
}
LastCopiedReg = RegA;
- assert(Register::isVirtualRegister(RegB) &&
- "cannot make instruction into two-address form");
+ assert(RegB.isVirtual() && "cannot make instruction into two-address form");
#ifndef NDEBUG
// First, verify that we don't have a use of "a" in the instruction
@@ -1404,7 +1410,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
MIB.addReg(RegB, 0, SubRegB);
const TargetRegisterClass *RC = MRI->getRegClass(RegB);
if (SubRegB) {
- if (Register::isVirtualRegister(RegA)) {
+ if (RegA.isVirtual()) {
assert(TRI->getMatchingSuperRegClass(RC, MRI->getRegClass(RegA),
SubRegB) &&
"tied subregister must be a truncation");
@@ -1425,7 +1431,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
if (LIS) {
LastCopyIdx = LIS->InsertMachineInstrInMaps(*PrevMI).getRegSlot();
- if (Register::isVirtualRegister(RegA)) {
+ if (RegA.isVirtual()) {
LiveInterval &LI = LIS->getInterval(RegA);
VNInfo *VNI = LI.getNextValue(LastCopyIdx, LIS->getVNInfoAllocator());
SlotIndex endIdx =
@@ -1445,7 +1451,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
}
// Make sure regA is a legal regclass for the SrcIdx operand.
- if (Register::isVirtualRegister(RegA) && Register::isVirtualRegister(RegB))
+ if (RegA.isVirtual() && RegB.isVirtual())
MRI->constrainRegClass(RegA, RC);
MO.setReg(RegA);
// The getMatchingSuper asserts guarantee that the register class projected
@@ -1649,7 +1655,7 @@ void TwoAddressInstructionPass::
eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {
MachineInstr &MI = *MBBI;
Register DstReg = MI.getOperand(0).getReg();
- if (MI.getOperand(0).getSubReg() || Register::isPhysicalRegister(DstReg) ||
+ if (MI.getOperand(0).getSubReg() || DstReg.isPhysical() ||
!(MI.getNumOperands() & 1)) {
LLVM_DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << MI);
llvm_unreachable(nullptr);
@@ -1699,7 +1705,7 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {
DefEmitted = true;
// Update LiveVariables' kill info.
- if (LV && isKill && !Register::isPhysicalRegister(SrcReg))
+ if (LV && isKill && !SrcReg.isPhysical())
LV->replaceKillInstruction(SrcReg, MI, *CopyMI);
LLVM_DEBUG(dbgs() << "Inserted: " << *CopyMI);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp
index 807babdcaf25..a42095d8718a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp
@@ -134,8 +134,9 @@ public:
Ctx(C), OrigTy(Ty), PromotedWidth(Width), Visited(visited),
Sources(sources), Sinks(sinks), SafeWrap(wrap) {
ExtTy = IntegerType::get(Ctx, PromotedWidth);
- assert(OrigTy->getPrimitiveSizeInBits() < ExtTy->getPrimitiveSizeInBits()
- && "Original type not smaller than extended type");
+ assert(OrigTy->getPrimitiveSizeInBits().getFixedSize() <
+ ExtTy->getPrimitiveSizeInBits().getFixedSize() &&
+ "Original type not smaller than extended type");
}
void Mutate();
@@ -809,7 +810,7 @@ bool TypePromotion::isLegalToPromote(Value *V) {
bool TypePromotion::TryToPromote(Value *V, unsigned PromotedWidth) {
Type *OrigTy = V->getType();
- TypeSize = OrigTy->getPrimitiveSizeInBits();
+ TypeSize = OrigTy->getPrimitiveSizeInBits().getFixedSize();
SafeToPromote.clear();
SafeWrap.clear();
@@ -980,15 +981,14 @@ bool TypePromotion::runOnFunction(Function &F) {
if (TLI->getTypeAction(ICmp->getContext(), SrcVT) !=
TargetLowering::TypePromoteInteger)
break;
-
EVT PromotedVT = TLI->getTypeToTransformTo(ICmp->getContext(), SrcVT);
- if (RegisterBitWidth < PromotedVT.getSizeInBits()) {
+ if (RegisterBitWidth < PromotedVT.getFixedSizeInBits()) {
LLVM_DEBUG(dbgs() << "IR Promotion: Couldn't find target register "
<< "for promoted type\n");
break;
}
- MadeChange |= TryToPromote(I, PromotedVT.getSizeInBits());
+ MadeChange |= TryToPromote(I, PromotedVT.getFixedSizeInBits());
break;
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp
index 66bcdd9b2c4a..978357d8f539 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp
@@ -49,8 +49,7 @@ EVT EVT::getExtendedVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements,
EVT EVT::getExtendedVectorVT(LLVMContext &Context, EVT VT, ElementCount EC) {
EVT ResultVT;
- ResultVT.LLVMTy =
- VectorType::get(VT.getTypeForEVT(Context), {EC.Min, EC.Scalable});
+ ResultVT.LLVMTy = VectorType::get(VT.getTypeForEVT(Context), EC);
assert(ResultVT.isExtended() && "Type is not extended!");
return ResultVT;
}
@@ -123,13 +122,13 @@ EVT EVT::getExtendedVectorElementType() const {
unsigned EVT::getExtendedVectorNumElements() const {
assert(isExtended() && "Type is not extended!");
ElementCount EC = cast<VectorType>(LLVMTy)->getElementCount();
- if (EC.Scalable) {
+ if (EC.isScalable()) {
WithColor::warning()
<< "The code that requested the fixed number of elements has made the "
"assumption that this vector is not scalable. This assumption was "
"not correct, and this may lead to broken code\n";
}
- return EC.Min;
+ return EC.getKnownMinValue();
}
ElementCount EVT::getExtendedVectorElementCount() const {
@@ -151,23 +150,25 @@ std::string EVT::getEVTString() const {
switch (V.SimpleTy) {
default:
if (isVector())
- return (isScalableVector() ? "nxv" : "v")
- + utostr(getVectorElementCount().Min)
- + getVectorElementType().getEVTString();
+ return (isScalableVector() ? "nxv" : "v") +
+ utostr(getVectorElementCount().getKnownMinValue()) +
+ getVectorElementType().getEVTString();
if (isInteger())
return "i" + utostr(getSizeInBits());
if (isFloatingPoint())
return "f" + utostr(getSizeInBits());
llvm_unreachable("Invalid EVT!");
- case MVT::bf16: return "bf16";
- case MVT::ppcf128: return "ppcf128";
- case MVT::isVoid: return "isVoid";
- case MVT::Other: return "ch";
- case MVT::Glue: return "glue";
- case MVT::x86mmx: return "x86mmx";
- case MVT::Metadata:return "Metadata";
- case MVT::Untyped: return "Untyped";
- case MVT::exnref : return "exnref";
+ case MVT::bf16: return "bf16";
+ case MVT::ppcf128: return "ppcf128";
+ case MVT::isVoid: return "isVoid";
+ case MVT::Other: return "ch";
+ case MVT::Glue: return "glue";
+ case MVT::x86mmx: return "x86mmx";
+ case MVT::x86amx: return "x86amx";
+ case MVT::Metadata: return "Metadata";
+ case MVT::Untyped: return "Untyped";
+ case MVT::funcref: return "funcref";
+ case MVT::externref: return "externref";
}
}
@@ -194,6 +195,7 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
case MVT::f128: return Type::getFP128Ty(Context);
case MVT::ppcf128: return Type::getPPC_FP128Ty(Context);
case MVT::x86mmx: return Type::getX86_MMXTy(Context);
+ case MVT::x86amx: return Type::getX86_AMXTy(Context);
case MVT::v1i1:
return FixedVectorType::get(Type::getInt1Ty(Context), 1);
case MVT::v2i1:
@@ -292,6 +294,12 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
return FixedVectorType::get(Type::getInt64Ty(Context), 16);
case MVT::v32i64:
return FixedVectorType::get(Type::getInt64Ty(Context), 32);
+ case MVT::v64i64:
+ return FixedVectorType::get(Type::getInt64Ty(Context), 64);
+ case MVT::v128i64:
+ return FixedVectorType::get(Type::getInt64Ty(Context), 128);
+ case MVT::v256i64:
+ return FixedVectorType::get(Type::getInt64Ty(Context), 256);
case MVT::v1i128:
return FixedVectorType::get(Type::getInt128Ty(Context), 1);
case MVT::v2f16:
@@ -307,9 +315,9 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
case MVT::v32f16:
return FixedVectorType::get(Type::getHalfTy(Context), 32);
case MVT::v64f16:
- return FixedVectorType::get(Type::getBFloatTy(Context), 64);
+ return FixedVectorType::get(Type::getHalfTy(Context), 64);
case MVT::v128f16:
- return FixedVectorType::get(Type::getBFloatTy(Context), 128);
+ return FixedVectorType::get(Type::getHalfTy(Context), 128);
case MVT::v2bf16:
return FixedVectorType::get(Type::getBFloatTy(Context), 2);
case MVT::v3bf16:
@@ -366,6 +374,12 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
return FixedVectorType::get(Type::getDoubleTy(Context), 16);
case MVT::v32f64:
return FixedVectorType::get(Type::getDoubleTy(Context), 32);
+ case MVT::v64f64:
+ return FixedVectorType::get(Type::getDoubleTy(Context), 64);
+ case MVT::v128f64:
+ return FixedVectorType::get(Type::getDoubleTy(Context), 128);
+ case MVT::v256f64:
+ return FixedVectorType::get(Type::getDoubleTy(Context), 256);
case MVT::nxv1i1:
return ScalableVectorType::get(Type::getInt1Ty(Context), 1);
case MVT::nxv2i1:
@@ -488,6 +502,7 @@ MVT MVT::getVT(Type *Ty, bool HandleUnknown){
case Type::DoubleTyID: return MVT(MVT::f64);
case Type::X86_FP80TyID: return MVT(MVT::f80);
case Type::X86_MMXTyID: return MVT(MVT::x86mmx);
+ case Type::X86_AMXTyID: return MVT(MVT::x86amx);
case Type::FP128TyID: return MVT(MVT::f128);
case Type::PPC_FP128TyID: return MVT(MVT::ppcf128);
case Type::PointerTyID: return MVT(MVT::iPTR);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/VirtRegMap.cpp b/contrib/llvm-project/llvm/lib/CodeGen/VirtRegMap.cpp
index 2c83f13b651b..5e0ff9d9092c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/VirtRegMap.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/VirtRegMap.cpp
@@ -68,6 +68,7 @@ bool VirtRegMap::runOnMachineFunction(MachineFunction &mf) {
Virt2PhysMap.clear();
Virt2StackSlotMap.clear();
Virt2SplitMap.clear();
+ Virt2ShapeMap.clear();
grow();
return false;
@@ -104,7 +105,7 @@ bool VirtRegMap::hasPreferredPhys(Register VirtReg) {
return false;
if (Hint.isVirtual())
Hint = getPhys(Hint);
- return getPhys(VirtReg) == Hint;
+ return Register(getPhys(VirtReg)) == Hint;
}
bool VirtRegMap::hasKnownPreference(Register VirtReg) {
@@ -187,7 +188,7 @@ class VirtRegRewriter : public MachineFunctionPass {
void addLiveInsForSubRanges(const LiveInterval &LI, Register PhysReg) const;
void handleIdentityCopy(MachineInstr &MI) const;
void expandCopyBundle(MachineInstr &MI) const;
- bool subRegLiveThrough(const MachineInstr &MI, Register SuperPhysReg) const;
+ bool subRegLiveThrough(const MachineInstr &MI, MCRegister SuperPhysReg) const;
public:
static char ID;
@@ -400,18 +401,18 @@ void VirtRegRewriter::handleIdentityCopy(MachineInstr &MI) const {
/// after processing the last in the bundle. Does not update LiveIntervals
/// which we shouldn't need for this instruction anymore.
void VirtRegRewriter::expandCopyBundle(MachineInstr &MI) const {
- if (!MI.isCopy())
+ if (!MI.isCopy() && !MI.isKill())
return;
if (MI.isBundledWithPred() && !MI.isBundledWithSucc()) {
SmallVector<MachineInstr *, 2> MIs({&MI});
- // Only do this when the complete bundle is made out of COPYs.
+ // Only do this when the complete bundle is made out of COPYs and KILLs.
MachineBasicBlock &MBB = *MI.getParent();
for (MachineBasicBlock::reverse_instr_iterator I =
std::next(MI.getReverseIterator()), E = MBB.instr_rend();
I != E && I->isBundledWithSucc(); ++I) {
- if (!I->isCopy())
+ if (!I->isCopy() && !I->isKill())
return;
MIs.push_back(&*I);
}
@@ -452,7 +453,7 @@ void VirtRegRewriter::expandCopyBundle(MachineInstr &MI) const {
// instruction, the bundle will have been completely undone.
if (BundledMI != BundleStart) {
BundledMI->removeFromBundle();
- MBB.insert(FirstMI, BundledMI);
+ MBB.insert(BundleStart, BundledMI);
} else if (BundledMI->isBundledWithSucc()) {
BundledMI->unbundleFromSucc();
BundleStart = &*std::next(BundledMI->getIterator());
@@ -468,7 +469,7 @@ void VirtRegRewriter::expandCopyBundle(MachineInstr &MI) const {
/// \pre \p MI defines a subregister of a virtual register that
/// has been assigned to \p SuperPhysReg.
bool VirtRegRewriter::subRegLiveThrough(const MachineInstr &MI,
- Register SuperPhysReg) const {
+ MCRegister SuperPhysReg) const {
SlotIndex MIIndex = LIS->getInstructionIndex(MI);
SlotIndex BeforeMIUses = MIIndex.getBaseIndex();
SlotIndex AfterMIDefs = MIIndex.getBoundaryIndex();
@@ -515,7 +516,7 @@ void VirtRegRewriter::rewrite() {
if (!MO.isReg() || !MO.getReg().isVirtual())
continue;
Register VirtReg = MO.getReg();
- Register PhysReg = VRM->getPhys(VirtReg);
+ MCRegister PhysReg = VRM->getPhys(VirtReg);
assert(PhysReg != VirtRegMap::NO_PHYS_REG &&
"Instruction uses unmapped VirtReg");
assert(!MRI->isReserved(PhysReg) && "Reserved register assignment");
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp
index 44f4fe2ff9b1..53424556682d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp
@@ -23,7 +23,7 @@
//
// - After:
// catchpad ...
-// exn = wasm.extract.exception();
+// exn = wasm.catch(WebAssembly::CPP_EXCEPTION);
// // Only add below in case it's not a single catch (...)
// wasm.landingpad.index(index);
// __wasm_lpad_context.lpad_index = index;
@@ -112,7 +112,7 @@ class WasmEHPrepare : public FunctionPass {
Function *LPadIndexF = nullptr; // wasm.landingpad.index() intrinsic
Function *LSDAF = nullptr; // wasm.lsda() intrinsic
Function *GetExnF = nullptr; // wasm.get.exception() intrinsic
- Function *ExtractExnF = nullptr; // wasm.extract.exception() intrinsic
+ Function *CatchF = nullptr; // wasm.catch() intrinsic
Function *GetSelectorF = nullptr; // wasm.get.ehselector() intrinsic
FunctionCallee CallPersonalityF =
nullptr; // _Unwind_CallPersonality() wrapper
@@ -124,7 +124,6 @@ class WasmEHPrepare : public FunctionPass {
void setupEHPadFunctions(Function &F);
void prepareEHPad(BasicBlock *BB, bool NeedPersonality, bool NeedLSDA = false,
unsigned Index = 0);
- void prepareTerminateCleanupPad(BasicBlock *BB);
public:
static char ID; // Pass identification, replacement for typeid
@@ -169,7 +168,7 @@ static void eraseDeadBBsAndChildren(const Container &BBs, DomTreeUpdater *DTU) {
SmallVector<BasicBlock *, 8> WL(BBs.begin(), BBs.end());
while (!WL.empty()) {
auto *BB = WL.pop_back_val();
- if (pred_begin(BB) != pred_end(BB))
+ if (!pred_empty(BB))
continue;
WL.append(succ_begin(BB), succ_end(BB));
DeleteDeadBlock(BB, DTU);
@@ -205,7 +204,7 @@ bool WasmEHPrepare::prepareThrows(Function &F) {
continue;
Changed = true;
auto *BB = ThrowI->getParent();
- SmallVector<BasicBlock *, 4> Succs(succ_begin(BB), succ_end(BB));
+ SmallVector<BasicBlock *, 4> Succs(successors(BB));
auto &InstList = BB->getInstList();
InstList.erase(std::next(BasicBlock::iterator(ThrowI)), InstList.end());
IRB.SetInsertPoint(BB);
@@ -328,12 +327,9 @@ void WasmEHPrepare::setupEHPadFunctions(Function &F) {
GetExnF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_get_exception);
GetSelectorF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_get_ehselector);
- // wasm.extract.exception() is the same as wasm.get.exception() but it does
- // not take a token argument. This will be lowered down to EXTRACT_EXCEPTION
- // pseudo instruction in instruction selection, which will be expanded using
- // 'br_on_exn' instruction later.
- ExtractExnF =
- Intrinsic::getDeclaration(&M, Intrinsic::wasm_extract_exception);
+ // wasm.catch() will be lowered down to wasm 'catch' instruction in
+ // instruction selection.
+ CatchF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_catch);
// _Unwind_CallPersonality() wrapper function, which calls the personality
CallPersonalityF = M.getOrInsertFunction(
@@ -373,8 +369,13 @@ void WasmEHPrepare::prepareEHPad(BasicBlock *BB, bool NeedPersonality,
return;
}
- Instruction *ExtractExnCI = IRB.CreateCall(ExtractExnF, {}, "exn");
- GetExnCI->replaceAllUsesWith(ExtractExnCI);
+ // Replace wasm.get.exception intrinsic with wasm.catch intrinsic, which will
+ // be lowered to wasm 'catch' instruction. We do this mainly because
+ // instruction selection cannot handle wasm.get.exception intrinsic's token
+ // argument.
+ Instruction *CatchCI =
+ IRB.CreateCall(CatchF, {IRB.getInt32(WebAssembly::CPP_EXCEPTION)}, "exn");
+ GetExnCI->replaceAllUsesWith(CatchCI);
GetExnCI->eraseFromParent();
// In case it is a catchpad with single catch (...) or a cleanuppad, we don't
@@ -387,7 +388,7 @@ void WasmEHPrepare::prepareEHPad(BasicBlock *BB, bool NeedPersonality,
}
return;
}
- IRB.SetInsertPoint(ExtractExnCI->getNextNode());
+ IRB.SetInsertPoint(CatchCI->getNextNode());
// This is to create a map of <landingpad EH label, landingpad index> in
// SelectionDAGISel, which is to be used in EHStreamer to emit LSDA tables.
@@ -403,7 +404,7 @@ void WasmEHPrepare::prepareEHPad(BasicBlock *BB, bool NeedPersonality,
IRB.CreateStore(IRB.CreateCall(LSDAF), LSDAField);
// Pseudocode: _Unwind_CallPersonality(exn);
- CallInst *PersCI = IRB.CreateCall(CallPersonalityF, ExtractExnCI,
+ CallInst *PersCI = IRB.CreateCall(CallPersonalityF, CatchCI,
OperandBundleDef("funclet", CPI));
PersCI->setDoesNotThrow();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp
index 5a25234ba850..96d256ba57a3 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp
@@ -804,13 +804,9 @@ void WinEHPrepare::cloneCommonBlocks(Function &F) {
<< "\' to block \'" << NewBlock->getName()
<< "\'.\n");
- BlocksInFunclet.erase(
- std::remove(BlocksInFunclet.begin(), BlocksInFunclet.end(), OldBlock),
- BlocksInFunclet.end());
+ llvm::erase_value(BlocksInFunclet, OldBlock);
ColorVector &OldColors = BlockColors[OldBlock];
- OldColors.erase(
- std::remove(OldColors.begin(), OldColors.end(), FuncletPadBB),
- OldColors.end());
+ llvm::erase_value(OldColors, FuncletPadBB);
DEBUG_WITH_TYPE("winehprepare-coloring",
dbgs() << " Removed color \'" << FuncletPadBB->getName()
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/XRayInstrumentation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/XRayInstrumentation.cpp
index ab9c0e81ebdc..11d1b309aa64 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/XRayInstrumentation.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/XRayInstrumentation.cpp
@@ -145,20 +145,22 @@ void XRayInstrumentation::prependRetWithPatchableExit(
bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) {
auto &F = MF.getFunction();
auto InstrAttr = F.getFnAttribute("function-instrument");
- bool AlwaysInstrument = !InstrAttr.hasAttribute(Attribute::None) &&
- InstrAttr.isStringAttribute() &&
+ bool AlwaysInstrument = InstrAttr.isStringAttribute() &&
InstrAttr.getValueAsString() == "xray-always";
+ bool NeverInstrument = InstrAttr.isStringAttribute() &&
+ InstrAttr.getValueAsString() == "xray-never";
+ if (NeverInstrument && !AlwaysInstrument)
+ return false;
auto ThresholdAttr = F.getFnAttribute("xray-instruction-threshold");
auto IgnoreLoopsAttr = F.getFnAttribute("xray-ignore-loops");
unsigned int XRayThreshold = 0;
if (!AlwaysInstrument) {
- if (ThresholdAttr.hasAttribute(Attribute::None) ||
- !ThresholdAttr.isStringAttribute())
+ if (!ThresholdAttr.isStringAttribute())
return false; // XRay threshold attribute not found.
if (ThresholdAttr.getValueAsString().getAsInteger(10, XRayThreshold))
return false; // Invalid value for threshold.
- bool IgnoreLoops = !IgnoreLoopsAttr.hasAttribute(Attribute::None);
+ bool IgnoreLoops = IgnoreLoopsAttr.isValid();
// Count the number of MachineInstr`s in MachineFunction
int64_t MICount = 0;