aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/CodeGen
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2022-07-04 19:20:19 +0000
committerDimitry Andric <dim@FreeBSD.org>2023-02-08 19:02:26 +0000
commit81ad626541db97eb356e2c1d4a20eb2a26a766ab (patch)
tree311b6a8987c32b1e1dcbab65c54cfac3fdb56175 /contrib/llvm-project/llvm/lib/CodeGen
parent5fff09660e06a66bed6482da9c70df328e16bbb6 (diff)
parent145449b1e420787bb99721a429341fa6be3adfb6 (diff)
downloadsrc-81ad626541db97eb356e2c1d4a20eb2a26a766ab.tar.gz
src-81ad626541db97eb356e2c1d4a20eb2a26a766ab.zip
Diffstat (limited to 'contrib/llvm-project/llvm/lib/CodeGen')
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp460
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp123
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h70
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp23
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp60
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h19
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp28
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.h5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.cpp58
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp332
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSections.cpp181
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp144
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.h1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/BreakFalseDeps.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CFIFixup.cpp225
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CFIInstrInserter.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CallingConvLower.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CodeGenCommonISel.cpp34
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp192
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp33
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/DFAPacketizer.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/DetectDeadLanes.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/EHContGuardCatchret.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ExpandMemCmp.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ExpandReductions.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ExpandVectorPredication.cpp27
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/FEntryInserter.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/FaultMaps.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/FinalizeISel.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GCMetadata.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GCRootLowering.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp45
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp313
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp41
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp51
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp150
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp56
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp154
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/IfConversion.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/IndirectBrExpandPass.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/InterferenceCache.h2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp22
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/JMCInstrumenter.cpp233
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LLVMTargetMachine.cpp38
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp321
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h85
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h11
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp22
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp29
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveInterval.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalCalc.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalUnion.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveRangeCalc.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveRangeShrink.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveRegMatrix.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveStacks.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LowLevelType.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRFSDiscriminator.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRNamerPass.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp74
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp30
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp32
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRSampleProfile.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp119
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp27
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineCheckDebugify.cpp18
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp426
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineCycleAnalysis.cpp110
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineDebugify.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineDominanceFrontier.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineDominators.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp111
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPass.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionSplitter.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp49
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineLoopInfo.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineLoopUtils.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfo.cpp218
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp53
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp133
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineSSAContext.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp298
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineStableHash.cpp56
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineStripDebug.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp86
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MacroFusion.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp88
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/NonRelocatableStringpool.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/OptimizePHIs.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ParallelCG.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PatchableFunction.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PostRASchedulerList.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ProcessImplicitDefs.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp184
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PseudoProbeInserter.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PseudoSourceValue.cpp46
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RDFGraph.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RDFLiveness.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.h10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocBasic.cpp33
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp38
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h48
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp36
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp564
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.h187
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocPBQP.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocScore.cpp22
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocScore.h19
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegUsageInfoCollector.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegisterBank.cpp (renamed from contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp)2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegisterBankInfo.cpp (renamed from contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp)7
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegisterClassInfo.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegisterUsageInfo.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RenameIndependentSubregs.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ReplaceWithVeclib.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp57
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.h2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp989
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp1896
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp36
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp28
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp262
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp24
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp388
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp92
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h13
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp46
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp936
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp39
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp848
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp496
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h26
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp26
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp28
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp59
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp888
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ShadowStackGCLowering.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SjLjEHPrepare.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp89
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SplitKit.h23
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/StackColoring.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/StackMaps.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TailDuplication.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp23
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp209
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp91
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetOptionsImpl.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp80
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetSchedule.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetSubtargetInfo.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp72
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp137
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/UnreachableBlockElim.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp6
251 files changed, 10583 insertions, 5136 deletions
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp
index cdf5586766da..f5dbaccfcad5 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp
@@ -21,12 +21,9 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Transforms/Utils/GlobalStatus.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp
index 03e63321e3c4..1940f46232d3 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp
@@ -38,8 +38,19 @@ void AIXException::emitExceptionInfoTable(const MCSymbol *LSDA,
// unsigned long personality; /* Pointer to the personality routine */
// }
- Asm->OutStreamer->SwitchSection(
- Asm->getObjFileLowering().getCompactUnwindSection());
+ auto *EHInfo =
+ cast<MCSectionXCOFF>(Asm->getObjFileLowering().getCompactUnwindSection());
+ if (Asm->TM.getFunctionSections()) {
+ // If option -ffunction-sections is on, append the function name to the
+ // name of EH Info Table csect so that each function has its own EH Info
+ // Table csect. This helps the linker to garbage-collect EH info of unused
+ // functions.
+ SmallString<128> NameStr = EHInfo->getName();
+ raw_svector_ostream(NameStr) << '.' << Asm->MF->getFunction().getName();
+ EHInfo = Asm->OutContext.getXCOFFSection(NameStr, EHInfo->getKind(),
+ EHInfo->getCsectProp());
+ }
+ Asm->OutStreamer->switchSection(EHInfo);
MCSymbol *EHInfoLabel =
TargetLoweringObjectFileXCOFF::getEHInfoTableSymbol(Asm->MF);
Asm->OutStreamer->emitLabel(EHInfoLabel);
@@ -74,8 +85,8 @@ void AIXException::endFunction(const MachineFunction *MF) {
const Function &F = MF->getFunction();
assert(F.hasPersonalityFn() &&
"Landingpads are presented, but no personality routine is found.");
- const GlobalValue *Per =
- dyn_cast<GlobalValue>(F.getPersonalityFn()->stripPointerCasts());
+ const auto *Per =
+ cast<GlobalValue>(F.getPersonalityFn()->stripPointerCasts());
const MCSymbol *PerSym = Asm->TM.getSymbol(Per);
emitExceptionInfoTable(LSDALabel, PerSym);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
index 223840c21d8b..e04a29fbb42b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -14,21 +14,14 @@
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Mangler.h"
-#include "llvm/IR/Module.h"
+#include "llvm/IR/Function.h"
#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/FormattedStream.h"
-#include "llvm/Target/TargetOptions.h"
using namespace llvm;
ARMException::ARMException(AsmPrinter *A) : DwarfCFIExceptionBase(A) {}
-ARMException::~ARMException() {}
+ARMException::~ARMException() = default;
ARMTargetStreamer &ARMException::getTargetStreamer() {
MCTargetStreamer &TS = *Asm->OutStreamer->getTargetStreamer();
@@ -101,7 +94,7 @@ void ARMException::emitTypeInfos(unsigned TTypeEncoding,
// Emit the Catch TypeInfos.
if (VerboseAsm && !TypeInfos.empty()) {
Asm->OutStreamer->AddComment(">> Catch TypeInfos <<");
- Asm->OutStreamer->AddBlankLine();
+ Asm->OutStreamer->addBlankLine();
Entry = TypeInfos.size();
}
@@ -116,7 +109,7 @@ void ARMException::emitTypeInfos(unsigned TTypeEncoding,
// Emit the Exception Specifications.
if (VerboseAsm && !FilterIds.empty()) {
Asm->OutStreamer->AddComment(">> Filter TypeInfos <<");
- Asm->OutStreamer->AddBlankLine();
+ Asm->OutStreamer->addBlankLine();
Entry = 0;
}
for (std::vector<unsigned>::const_iterator
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
index 65c45f73e965..b10d79f4b5a6 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
@@ -18,7 +18,6 @@
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/DIE.h"
-#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/raw_ostream.h"
@@ -563,7 +562,7 @@ void llvm::emitDWARF5AccelTable(
if (CompUnits.empty())
return;
- Asm->OutStreamer->SwitchSection(
+ Asm->OutStreamer->switchSection(
Asm->getObjFileLowering().getDwarfDebugNamesSection());
Contents.finalize(Asm, "names");
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp
index 21da9d50efba..32d8dc793510 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp
@@ -17,7 +17,7 @@
using namespace llvm;
unsigned AddressPool::getIndex(const MCSymbol *Sym, bool TLS) {
- HasBeenUsed = true;
+ resetUsedFlag(true);
auto IterBool =
Pool.insert(std::make_pair(Sym, AddressPoolEntry(Pool.size(), TLS)));
return IterBool.first->second.Number;
@@ -44,7 +44,7 @@ void AddressPool::emit(AsmPrinter &Asm, MCSection *AddrSection) {
return;
// Start the dwarf addr section.
- Asm.OutStreamer->SwitchSection(AddrSection);
+ Asm.OutStreamer->switchSection(AddrSection);
MCSymbol *EndLabel = nullptr;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 3e8e190eecc3..4a31bf85446b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -27,6 +27,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/TinyPtrVector.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/ConstantFolding.h"
@@ -48,7 +49,6 @@
#include "llvm/CodeGen/MachineInstrBundle.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/CodeGen/MachineOperand.h"
@@ -82,33 +82,26 @@
#include "llvm/IR/PseudoProbe.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
+#include "llvm/IR/ValueHandle.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDirectives.h"
-#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSectionCOFF.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
-#include "llvm/MC/MCSectionXCOFF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolELF.h"
-#include "llvm/MC/MCSymbolXCOFF.h"
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/MC/MCValue.h"
#include "llvm/MC/SectionKind.h"
-#include "llvm/MC/TargetRegistry.h"
#include "llvm/Pass.h"
-#include "llvm/Remarks/Remark.h"
-#include "llvm/Remarks/RemarkFormat.h"
#include "llvm/Remarks/RemarkStreamer.h"
-#include "llvm/Remarks/RemarkStringTable.h"
#include "llvm/Support/Casting.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
@@ -125,7 +118,6 @@
#include <cinttypes>
#include <cstdint>
#include <iterator>
-#include <limits>
#include <memory>
#include <string>
#include <utility>
@@ -135,11 +127,6 @@ using namespace llvm;
#define DEBUG_TYPE "asm-printer"
-// FIXME: this option currently only applies to DWARF, and not CodeView, tables
-static cl::opt<bool>
- DisableDebugInfoPrinting("disable-debug-info-print", cl::Hidden,
- cl::desc("Disable debug info printing"));
-
const char DWARFGroupName[] = "dwarf";
const char DWARFGroupDescription[] = "DWARF Emission";
const char DbgTimerName[] = "emit";
@@ -167,6 +154,178 @@ static gcp_map_type &getGCMap(void *&P) {
return *(gcp_map_type*)P;
}
+namespace {
+class AddrLabelMapCallbackPtr final : CallbackVH {
+ AddrLabelMap *Map = nullptr;
+
+public:
+ AddrLabelMapCallbackPtr() = default;
+ AddrLabelMapCallbackPtr(Value *V) : CallbackVH(V) {}
+
+ void setPtr(BasicBlock *BB) {
+ ValueHandleBase::operator=(BB);
+ }
+
+ void setMap(AddrLabelMap *map) { Map = map; }
+
+ void deleted() override;
+ void allUsesReplacedWith(Value *V2) override;
+};
+} // namespace
+
+class llvm::AddrLabelMap {
+ MCContext &Context;
+ struct AddrLabelSymEntry {
+ /// The symbols for the label.
+ TinyPtrVector<MCSymbol *> Symbols;
+
+ Function *Fn; // The containing function of the BasicBlock.
+ unsigned Index; // The index in BBCallbacks for the BasicBlock.
+ };
+
+ DenseMap<AssertingVH<BasicBlock>, AddrLabelSymEntry> AddrLabelSymbols;
+
+ /// Callbacks for the BasicBlock's that we have entries for. We use this so
+ /// we get notified if a block is deleted or RAUWd.
+ std::vector<AddrLabelMapCallbackPtr> BBCallbacks;
+
+ /// This is a per-function list of symbols whose corresponding BasicBlock got
+ /// deleted. These symbols need to be emitted at some point in the file, so
+ /// AsmPrinter emits them after the function body.
+ DenseMap<AssertingVH<Function>, std::vector<MCSymbol *>>
+ DeletedAddrLabelsNeedingEmission;
+
+public:
+ AddrLabelMap(MCContext &context) : Context(context) {}
+
+ ~AddrLabelMap() {
+ assert(DeletedAddrLabelsNeedingEmission.empty() &&
+ "Some labels for deleted blocks never got emitted");
+ }
+
+ ArrayRef<MCSymbol *> getAddrLabelSymbolToEmit(BasicBlock *BB);
+
+ void takeDeletedSymbolsForFunction(Function *F,
+ std::vector<MCSymbol *> &Result);
+
+ void UpdateForDeletedBlock(BasicBlock *BB);
+ void UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New);
+};
+
+ArrayRef<MCSymbol *> AddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) {
+ assert(BB->hasAddressTaken() &&
+ "Shouldn't get label for block without address taken");
+ AddrLabelSymEntry &Entry = AddrLabelSymbols[BB];
+
+ // If we already had an entry for this block, just return it.
+ if (!Entry.Symbols.empty()) {
+ assert(BB->getParent() == Entry.Fn && "Parent changed");
+ return Entry.Symbols;
+ }
+
+ // Otherwise, this is a new entry, create a new symbol for it and add an
+ // entry to BBCallbacks so we can be notified if the BB is deleted or RAUWd.
+ BBCallbacks.emplace_back(BB);
+ BBCallbacks.back().setMap(this);
+ Entry.Index = BBCallbacks.size() - 1;
+ Entry.Fn = BB->getParent();
+ MCSymbol *Sym = BB->hasAddressTaken() ? Context.createNamedTempSymbol()
+ : Context.createTempSymbol();
+ Entry.Symbols.push_back(Sym);
+ return Entry.Symbols;
+}
+
+/// If we have any deleted symbols for F, return them.
+void AddrLabelMap::takeDeletedSymbolsForFunction(
+ Function *F, std::vector<MCSymbol *> &Result) {
+ DenseMap<AssertingVH<Function>, std::vector<MCSymbol *>>::iterator I =
+ DeletedAddrLabelsNeedingEmission.find(F);
+
+ // If there are no entries for the function, just return.
+ if (I == DeletedAddrLabelsNeedingEmission.end())
+ return;
+
+ // Otherwise, take the list.
+ std::swap(Result, I->second);
+ DeletedAddrLabelsNeedingEmission.erase(I);
+}
+
+//===- Address of Block Management ----------------------------------------===//
+
+ArrayRef<MCSymbol *>
+AsmPrinter::getAddrLabelSymbolToEmit(const BasicBlock *BB) {
+ // Lazily create AddrLabelSymbols.
+ if (!AddrLabelSymbols)
+ AddrLabelSymbols = std::make_unique<AddrLabelMap>(OutContext);
+ return AddrLabelSymbols->getAddrLabelSymbolToEmit(
+ const_cast<BasicBlock *>(BB));
+}
+
+void AsmPrinter::takeDeletedSymbolsForFunction(
+ const Function *F, std::vector<MCSymbol *> &Result) {
+ // If no blocks have had their addresses taken, we're done.
+ if (!AddrLabelSymbols)
+ return;
+ return AddrLabelSymbols->takeDeletedSymbolsForFunction(
+ const_cast<Function *>(F), Result);
+}
+
+void AddrLabelMap::UpdateForDeletedBlock(BasicBlock *BB) {
+ // If the block got deleted, there is no need for the symbol. If the symbol
+ // was already emitted, we can just forget about it, otherwise we need to
+ // queue it up for later emission when the function is output.
+ AddrLabelSymEntry Entry = std::move(AddrLabelSymbols[BB]);
+ AddrLabelSymbols.erase(BB);
+ assert(!Entry.Symbols.empty() && "Didn't have a symbol, why a callback?");
+ BBCallbacks[Entry.Index] = nullptr; // Clear the callback.
+
+#if !LLVM_MEMORY_SANITIZER_BUILD
+ // BasicBlock is destroyed already, so this access is UB detectable by msan.
+ assert((BB->getParent() == nullptr || BB->getParent() == Entry.Fn) &&
+ "Block/parent mismatch");
+#endif
+
+ for (MCSymbol *Sym : Entry.Symbols) {
+ if (Sym->isDefined())
+ return;
+
+ // If the block is not yet defined, we need to emit it at the end of the
+ // function. Add the symbol to the DeletedAddrLabelsNeedingEmission list
+ // for the containing Function. Since the block is being deleted, its
+ // parent may already be removed, we have to get the function from 'Entry'.
+ DeletedAddrLabelsNeedingEmission[Entry.Fn].push_back(Sym);
+ }
+}
+
+void AddrLabelMap::UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New) {
+ // Get the entry for the RAUW'd block and remove it from our map.
+ AddrLabelSymEntry OldEntry = std::move(AddrLabelSymbols[Old]);
+ AddrLabelSymbols.erase(Old);
+ assert(!OldEntry.Symbols.empty() && "Didn't have a symbol, why a callback?");
+
+ AddrLabelSymEntry &NewEntry = AddrLabelSymbols[New];
+
+ // If New is not address taken, just move our symbol over to it.
+ if (NewEntry.Symbols.empty()) {
+ BBCallbacks[OldEntry.Index].setPtr(New); // Update the callback.
+ NewEntry = std::move(OldEntry); // Set New's entry.
+ return;
+ }
+
+ BBCallbacks[OldEntry.Index] = nullptr; // Update the callback.
+
+ // Otherwise, we need to add the old symbols to the new block's set.
+ llvm::append_range(NewEntry.Symbols, OldEntry.Symbols);
+}
+
+void AddrLabelMapCallbackPtr::deleted() {
+ Map->UpdateForDeletedBlock(cast<BasicBlock>(getValPtr()));
+}
+
+void AddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) {
+ Map->UpdateForRAUWBlock(cast<BasicBlock>(getValPtr()), cast<BasicBlock>(V2));
+}
+
/// getGVAlignment - Return the alignment to use for the specified global
/// value. This rounds up to the preferred alignment if possible and legal.
Align AsmPrinter::getGVAlignment(const GlobalObject *GV, const DataLayout &DL,
@@ -271,6 +430,10 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
bool AsmPrinter::doInitialization(Module &M) {
auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>();
MMI = MMIWP ? &MMIWP->getMMI() : nullptr;
+ HasSplitStack = false;
+ HasNoSplitStack = false;
+
+ AddrLabelSymbols = nullptr;
// Initialize TargetLoweringObjectFile.
const_cast<TargetLoweringObjectFile&>(getObjFileLowering())
@@ -281,9 +444,6 @@ bool AsmPrinter::doInitialization(Module &M) {
OutStreamer->initSections(false, *TM.getMCSubtargetInfo());
- if (DisableDebugInfoPrinting)
- MMI->setDebugInfoAvailability(false);
-
// Emit the version-min deployment target directive if needed.
//
// FIXME: If we end up with a collection of these sorts of Darwin-specific
@@ -335,11 +495,11 @@ bool AsmPrinter::doInitialization(Module &M) {
// Emit module-level inline asm if it exists.
if (!M.getModuleInlineAsm().empty()) {
OutStreamer->AddComment("Start of file scope inline assembly");
- OutStreamer->AddBlankLine();
+ OutStreamer->addBlankLine();
emitInlineAsm(M.getModuleInlineAsm() + "\n", *TM.getMCSubtargetInfo(),
TM.Options.MCOptions);
OutStreamer->AddComment("End of file scope inline assembly");
- OutStreamer->AddBlankLine();
+ OutStreamer->addBlankLine();
}
if (MAI->doesSupportDebugInformation()) {
@@ -351,7 +511,7 @@ bool AsmPrinter::doInitialization(Module &M) {
CodeViewLineTablesGroupDescription);
}
if (!EmitCodeView || M.getDwarfVersion()) {
- if (!DisableDebugInfoPrinting) {
+ if (MMI->hasDebugInfo()) {
DD = new DwarfDebug(this);
Handlers.emplace_back(std::unique_ptr<DwarfDebug>(DD), DbgTimerName,
DbgTimerDescription, DWARFGroupName,
@@ -536,9 +696,9 @@ void AsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
if (isVerbose()) {
// When printing the control variable __emutls_v.*,
// we don't need to print the original TLS variable name.
- GV->printAsOperand(OutStreamer->GetCommentOS(),
- /*PrintType=*/false, GV->getParent());
- OutStreamer->GetCommentOS() << '\n';
+ GV->printAsOperand(OutStreamer->getCommentOS(),
+ /*PrintType=*/false, GV->getParent());
+ OutStreamer->getCommentOS() << '\n';
}
}
@@ -652,7 +812,7 @@ void AsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
TheSection = getObjFileLowering().getTLSBSSSection();
OutStreamer->emitTBSSSymbol(TheSection, MangSym, Size, Alignment.value());
} else if (GVKind.isThreadData()) {
- OutStreamer->SwitchSection(TheSection);
+ OutStreamer->switchSection(TheSection);
emitAlignment(Alignment, GV);
OutStreamer->emitLabel(MangSym);
@@ -661,12 +821,12 @@ void AsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
GV->getInitializer());
}
- OutStreamer->AddBlankLine();
+ OutStreamer->addBlankLine();
// Emit the variable struct for the runtime.
MCSection *TLVSect = getObjFileLowering().getTLSExtraDataSection();
- OutStreamer->SwitchSection(TLVSect);
+ OutStreamer->switchSection(TLVSect);
// Emit the linkage here.
emitLinkage(GV, GVSym);
OutStreamer->emitLabel(GVSym);
@@ -681,13 +841,13 @@ void AsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
OutStreamer->emitIntValue(0, PtrSize);
OutStreamer->emitSymbolValue(MangSym, PtrSize);
- OutStreamer->AddBlankLine();
+ OutStreamer->addBlankLine();
return;
}
MCSymbol *EmittedInitSym = GVSym;
- OutStreamer->SwitchSection(TheSection);
+ OutStreamer->switchSection(TheSection);
emitLinkage(GV, EmittedInitSym);
emitAlignment(Alignment, GV);
@@ -704,7 +864,7 @@ void AsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
OutStreamer->emitELFSize(EmittedInitSym,
MCConstantExpr::create(Size, OutContext));
- OutStreamer->AddBlankLine();
+ OutStreamer->addBlankLine();
}
/// Emit the directive and value for debug thread local expression
@@ -723,7 +883,7 @@ void AsmPrinter::emitFunctionHeader() {
const Function &F = MF->getFunction();
if (isVerbose())
- OutStreamer->GetCommentOS()
+ OutStreamer->getCommentOS()
<< "-- Begin function "
<< GlobalValue::dropLLVMManglingEscape(F.getName()) << '\n';
@@ -737,7 +897,7 @@ void AsmPrinter::emitFunctionHeader() {
MF->setSection(getObjFileLowering().getUniqueSectionForFunction(F, TM));
else
MF->setSection(getObjFileLowering().SectionForGlobal(&F, TM));
- OutStreamer->SwitchSection(MF->getSection());
+ OutStreamer->switchSection(MF->getSection());
if (!MAI->hasVisibilityOnlyWithLinkage())
emitVisibility(CurrentFnSym, F.getVisibility());
@@ -756,10 +916,10 @@ void AsmPrinter::emitFunctionHeader() {
OutStreamer->emitSymbolAttribute(CurrentFnSym, MCSA_Cold);
if (isVerbose()) {
- F.printAsOperand(OutStreamer->GetCommentOS(),
- /*PrintType=*/false, F.getParent());
+ F.printAsOperand(OutStreamer->getCommentOS(),
+ /*PrintType=*/false, F.getParent());
emitFunctionHeaderComment();
- OutStreamer->GetCommentOS() << '\n';
+ OutStreamer->getCommentOS() << '\n';
}
// Emit the prefix data.
@@ -817,7 +977,7 @@ void AsmPrinter::emitFunctionHeader() {
// references to the dangling symbols. Emit them at the start of the function
// so that we don't get references to undefined symbols.
std::vector<MCSymbol*> DeadBlockSyms;
- MMI->takeDeletedSymbolsForFunction(&F, DeadBlockSyms);
+ takeDeletedSymbolsForFunction(&F, DeadBlockSyms);
for (MCSymbol *DeadBlockSym : DeadBlockSyms) {
OutStreamer->AddComment("Address taken block that was later removed");
OutStreamer->emitLabel(DeadBlockSym);
@@ -844,6 +1004,24 @@ void AsmPrinter::emitFunctionHeader() {
// Emit the prologue data.
if (F.hasPrologueData())
emitGlobalConstant(F.getParent()->getDataLayout(), F.getPrologueData());
+
+ // Emit the function prologue data for the indirect call sanitizer.
+ if (const MDNode *MD = F.getMetadata(LLVMContext::MD_func_sanitize)) {
+ assert(TM.getTargetTriple().getArch() == Triple::x86 ||
+ TM.getTargetTriple().getArch() == Triple::x86_64);
+ assert(MD->getNumOperands() == 2);
+
+ auto *PrologueSig = mdconst::extract<Constant>(MD->getOperand(0));
+ auto *FTRTTIProxy = mdconst::extract<Constant>(MD->getOperand(1));
+ assert(PrologueSig && FTRTTIProxy);
+ emitGlobalConstant(F.getParent()->getDataLayout(), PrologueSig);
+
+ const MCExpr *Proxy = lowerConstant(FTRTTIProxy);
+ const MCExpr *FnExp = MCSymbolRefExpr::create(CurrentFnSym, OutContext);
+ const MCExpr *PCRel = MCBinaryExpr::createSub(Proxy, FnExp, OutContext);
+ // Use 32 bit since only small code model is supported.
+ OutStreamer->emitValue(PCRel, 4u);
+ }
}
/// EmitFunctionEntryLabel - Emit the label that is the entrypoint for the
@@ -912,7 +1090,7 @@ void AsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
<< printReg(RegNo, MF->getSubtarget().getRegisterInfo());
OutStreamer->AddComment(OS.str());
- OutStreamer->AddBlankLine();
+ OutStreamer->addBlankLine();
}
static void emitKill(const MachineInstr *MI, AsmPrinter &AP) {
@@ -925,7 +1103,7 @@ static void emitKill(const MachineInstr *MI, AsmPrinter &AP) {
<< printReg(Op.getReg(), AP.MF->getSubtarget().getRegisterInfo());
}
AP.OutStreamer->AddComment(OS.str());
- AP.OutStreamer->AddBlankLine();
+ AP.OutStreamer->addBlankLine();
}
/// emitDebugValueComment - This method handles the target-independent form
@@ -1147,32 +1325,42 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
const MCSymbol *FunctionSymbol = getFunctionBegin();
- OutStreamer->PushSection();
- OutStreamer->SwitchSection(BBAddrMapSection);
+ OutStreamer->pushSection();
+ OutStreamer->switchSection(BBAddrMapSection);
+ OutStreamer->AddComment("version");
+ OutStreamer->emitInt8(OutStreamer->getContext().getBBAddrMapVersion());
+ OutStreamer->AddComment("feature");
+ OutStreamer->emitInt8(0);
+ OutStreamer->AddComment("function address");
OutStreamer->emitSymbolValue(FunctionSymbol, getPointerSize());
- // Emit the total number of basic blocks in this function.
+ OutStreamer->AddComment("number of basic blocks");
OutStreamer->emitULEB128IntValue(MF.size());
+ const MCSymbol *PrevMBBEndSymbol = FunctionSymbol;
// Emit BB Information for each basic block in the funciton.
for (const MachineBasicBlock &MBB : MF) {
const MCSymbol *MBBSymbol =
MBB.isEntryBlock() ? FunctionSymbol : MBB.getSymbol();
- // Emit the basic block offset.
- emitLabelDifferenceAsULEB128(MBBSymbol, FunctionSymbol);
+ // Emit the basic block offset relative to the end of the previous block.
+ // This is zero unless the block is padded due to alignment.
+ emitLabelDifferenceAsULEB128(MBBSymbol, PrevMBBEndSymbol);
// Emit the basic block size. When BBs have alignments, their size cannot
// always be computed from their offsets.
emitLabelDifferenceAsULEB128(MBB.getEndSymbol(), MBBSymbol);
OutStreamer->emitULEB128IntValue(getBBAddrMapMetadata(MBB));
+ PrevMBBEndSymbol = MBB.getEndSymbol();
}
- OutStreamer->PopSection();
+ OutStreamer->popSection();
}
void AsmPrinter::emitPseudoProbe(const MachineInstr &MI) {
- auto GUID = MI.getOperand(0).getImm();
- auto Index = MI.getOperand(1).getImm();
- auto Type = MI.getOperand(2).getImm();
- auto Attr = MI.getOperand(3).getImm();
- DILocation *DebugLoc = MI.getDebugLoc();
- PP->emitPseudoProbe(GUID, Index, Type, Attr, DebugLoc);
+ if (PP) {
+ auto GUID = MI.getOperand(0).getImm();
+ auto Index = MI.getOperand(1).getImm();
+ auto Type = MI.getOperand(2).getImm();
+ auto Attr = MI.getOperand(3).getImm();
+ DILocation *DebugLoc = MI.getDebugLoc();
+ PP->emitPseudoProbe(GUID, Index, Type, Attr, DebugLoc);
+ }
}
void AsmPrinter::emitStackSizeSection(const MachineFunction &MF) {
@@ -1189,15 +1377,16 @@ void AsmPrinter::emitStackSizeSection(const MachineFunction &MF) {
if (FrameInfo.hasVarSizedObjects())
return;
- OutStreamer->PushSection();
- OutStreamer->SwitchSection(StackSizeSection);
+ OutStreamer->pushSection();
+ OutStreamer->switchSection(StackSizeSection);
const MCSymbol *FunctionSymbol = getFunctionBegin();
- uint64_t StackSize = FrameInfo.getStackSize();
+ uint64_t StackSize =
+ FrameInfo.getStackSize() + FrameInfo.getUnsafeStackSize();
OutStreamer->emitSymbolValue(FunctionSymbol, TM.getProgramPointerSize());
OutStreamer->emitULEB128IntValue(StackSize);
- OutStreamer->PopSection();
+ OutStreamer->popSection();
}
void AsmPrinter::emitStackUsage(const MachineFunction &MF) {
@@ -1208,7 +1397,8 @@ void AsmPrinter::emitStackUsage(const MachineFunction &MF) {
return;
const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
- uint64_t StackSize = FrameInfo.getStackSize();
+ uint64_t StackSize =
+ FrameInfo.getStackSize() + FrameInfo.getUnsafeStackSize();
if (StackUsageStream == nullptr) {
std::error_code EC;
@@ -1298,7 +1488,7 @@ void AsmPrinter::emitFunctionBody() {
}
if (isVerbose())
- emitComments(MI, OutStreamer->GetCommentOS());
+ emitComments(MI, OutStreamer->getCommentOS());
switch (MI.getOpcode()) {
case TargetOpcode::CFI_INSTRUCTION:
@@ -1460,7 +1650,7 @@ void AsmPrinter::emitFunctionBody() {
}
// Switch to the original section in case basic block sections was used.
- OutStreamer->SwitchSection(MF->getSection());
+ OutStreamer->switchSection(MF->getSection());
const Function &F = MF->getFunction();
for (const auto &BB : F) {
@@ -1527,9 +1717,9 @@ void AsmPrinter::emitFunctionBody() {
emitPatchableFunctionEntries();
if (isVerbose())
- OutStreamer->GetCommentOS() << "-- End function\n";
+ OutStreamer->getCommentOS() << "-- End function\n";
- OutStreamer->AddBlankLine();
+ OutStreamer->addBlankLine();
}
/// Compute the number of Global Variables that uses a Constant.
@@ -1617,10 +1807,7 @@ void AsmPrinter::emitGlobalAlias(Module &M, const GlobalAlias &GA) {
// Treat bitcasts of functions as functions also. This is important at least
// on WebAssembly where object and function addresses can't alias each other.
if (!IsFunction)
- if (auto *CE = dyn_cast<ConstantExpr>(GA.getAliasee()))
- if (CE->getOpcode() == Instruction::BitCast)
- IsFunction =
- CE->getOperand(0)->getType()->getPointerElementType()->isFunctionTy();
+ IsFunction = isa<Function>(GA.getAliasee()->stripPointerCasts());
// AIX's assembly directive `.set` is not usable for aliasing purpose,
// so AIX has to use the extra-label-at-definition strategy. At this
@@ -1650,13 +1837,13 @@ void AsmPrinter::emitGlobalAlias(Module &M, const GlobalAlias &GA) {
if (IsFunction) {
OutStreamer->emitSymbolAttribute(Name, MCSA_ELF_TypeFunction);
if (TM.getTargetTriple().isOSBinFormatCOFF()) {
- OutStreamer->BeginCOFFSymbolDef(Name);
- OutStreamer->EmitCOFFSymbolStorageClass(
+ OutStreamer->beginCOFFSymbolDef(Name);
+ OutStreamer->emitCOFFSymbolStorageClass(
GA.hasLocalLinkage() ? COFF::IMAGE_SYM_CLASS_STATIC
: COFF::IMAGE_SYM_CLASS_EXTERNAL);
- OutStreamer->EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION
+ OutStreamer->emitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION
<< COFF::SCT_COMPLEX_TYPE_SHIFT);
- OutStreamer->EndCOFFSymbolDef();
+ OutStreamer->endCOFFSymbolDef();
}
}
@@ -1734,7 +1921,7 @@ void AsmPrinter::emitRemarksSection(remarks::RemarkStreamer &RS) {
// Switch to the remarks section.
MCSection *RemarksSection =
OutContext.getObjectFileInfo()->getRemarksSection();
- OutStreamer->SwitchSection(RemarksSection);
+ OutStreamer->switchSection(RemarksSection);
OutStreamer->emitBinaryData(OS.str());
}
@@ -1805,7 +1992,7 @@ bool AsmPrinter::doFinalization(Module &M) {
// Output stubs for external and common global variables.
MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
if (!Stubs.empty()) {
- OutStreamer->SwitchSection(TLOF.getDataSection());
+ OutStreamer->switchSection(TLOF.getDataSection());
const DataLayout &DL = M.getDataLayout();
emitAlignment(Align(DL.getPointerSize()));
@@ -1829,7 +2016,7 @@ bool AsmPrinter::doFinalization(Module &M) {
for (const auto &Stub : Stubs) {
SmallString<256> SectionName = StringRef(".rdata$");
SectionName += Stub.first->getName();
- OutStreamer->SwitchSection(OutContext.getCOFFSection(
+ OutStreamer->switchSection(OutContext.getCOFFSection(
SectionName,
COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ |
COFF::IMAGE_SCN_LNK_COMDAT,
@@ -1920,31 +2107,14 @@ bool AsmPrinter::doFinalization(Module &M) {
// Emit bytes for llvm.commandline metadata.
emitModuleCommandLines(M);
- // Emit __morestack address if needed for indirect calls.
- if (MMI->usesMorestackAddr()) {
- Align Alignment(1);
- MCSection *ReadOnlySection = getObjFileLowering().getSectionForConstant(
- getDataLayout(), SectionKind::getReadOnly(),
- /*C=*/nullptr, Alignment);
- OutStreamer->SwitchSection(ReadOnlySection);
-
- MCSymbol *AddrSymbol =
- OutContext.getOrCreateSymbol(StringRef("__morestack_addr"));
- OutStreamer->emitLabel(AddrSymbol);
-
- unsigned PtrSize = MAI->getCodePointerSize();
- OutStreamer->emitSymbolValue(GetExternalSymbolSymbol("__morestack"),
- PtrSize);
- }
-
// Emit .note.GNU-split-stack and .note.GNU-no-split-stack sections if
// split-stack is used.
- if (TM.getTargetTriple().isOSBinFormatELF() && MMI->hasSplitStack()) {
- OutStreamer->SwitchSection(
- OutContext.getELFSection(".note.GNU-split-stack", ELF::SHT_PROGBITS, 0));
- if (MMI->hasNosplitStack())
- OutStreamer->SwitchSection(
- OutContext.getELFSection(".note.GNU-no-split-stack", ELF::SHT_PROGBITS, 0));
+ if (TM.getTargetTriple().isOSBinFormatELF() && HasSplitStack) {
+ OutStreamer->switchSection(OutContext.getELFSection(".note.GNU-split-stack",
+ ELF::SHT_PROGBITS, 0));
+ if (HasNoSplitStack)
+ OutStreamer->switchSection(OutContext.getELFSection(
+ ".note.GNU-no-split-stack", ELF::SHT_PROGBITS, 0));
}
// If we don't have any trampolines, then we don't require stack memory
@@ -1952,7 +2122,7 @@ bool AsmPrinter::doFinalization(Module &M) {
Function *InitTrampolineIntrinsic = M.getFunction("llvm.init.trampoline");
if (!InitTrampolineIntrinsic || InitTrampolineIntrinsic->use_empty())
if (MCSection *S = MAI->getNonexecutableStackSection(OutContext))
- OutStreamer->SwitchSection(S);
+ OutStreamer->switchSection(S);
if (TM.Options.EmitAddrsig) {
// Emit address-significance attributes for all globals.
@@ -1973,7 +2143,7 @@ bool AsmPrinter::doFinalization(Module &M) {
GV.getVisibility() != GlobalValue::DefaultVisibility)
continue;
- OutStreamer->SwitchSection(
+ OutStreamer->switchSection(
OutContext.getELFSection(".llvm_sympart", ELF::SHT_LLVM_SYMPART, 0, 0,
"", false, ++UniqueID, nullptr));
OutStreamer->emitBytes(GV.getPartition());
@@ -1989,8 +2159,9 @@ bool AsmPrinter::doFinalization(Module &M) {
emitEndOfAsmFile(M);
MMI = nullptr;
+ AddrLabelSymbols = nullptr;
- OutStreamer->Finish();
+ OutStreamer->finish();
OutStreamer->reset();
OwnedMLI.reset();
OwnedMDT.reset();
@@ -2009,6 +2180,16 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
this->MF = &MF;
const Function &F = MF.getFunction();
+ // Record that there are split-stack functions, so we will emit a special
+ // section to tell the linker.
+ if (MF.shouldSplitStack()) {
+ HasSplitStack = true;
+
+ if (!MF.getFrameInfo().needsSplitStackProlog())
+ HasNoSplitStack = true;
+ } else
+ HasNoSplitStack = true;
+
// Get the function symbol.
if (!MAI->needsFunctionDescriptors()) {
CurrentFnSym = getSymbol(&MF.getFunction());
@@ -2113,7 +2294,7 @@ void AsmPrinter::emitConstantPool() {
continue;
if (CurSection != CPSections[i].S) {
- OutStreamer->SwitchSection(CPSections[i].S);
+ OutStreamer->switchSection(CPSections[i].S);
emitAlignment(Align(CPSections[i].Alignment));
CurSection = CPSections[i].S;
Offset = 0;
@@ -2156,7 +2337,7 @@ void AsmPrinter::emitJumpTableInfo() {
if (JTInDiffSection) {
// Drop it in the readonly section.
MCSection *ReadOnlySection = TLOF.getSectionForJumpTable(F, TM);
- OutStreamer->SwitchSection(ReadOnlySection);
+ OutStreamer->switchSection(ReadOnlySection);
}
emitAlignment(Align(MJTI->getEntryAlignment(DL)));
@@ -2392,7 +2573,7 @@ void AsmPrinter::emitXXStructorList(const DataLayout &DL, const Constant *List,
MCSection *OutputSection =
(IsCtor ? Obj.getStaticCtorSection(S.Priority, KeySym)
: Obj.getStaticDtorSection(S.Priority, KeySym));
- OutStreamer->SwitchSection(OutputSection);
+ OutStreamer->switchSection(OutputSection);
if (OutStreamer->getCurrentSection() != OutStreamer->getPreviousSection())
emitAlignment(Align);
emitXXStructor(DL, S.Func);
@@ -2423,8 +2604,8 @@ void AsmPrinter::emitModuleCommandLines(Module &M) {
if (!NMD || !NMD->getNumOperands())
return;
- OutStreamer->PushSection();
- OutStreamer->SwitchSection(CommandLine);
+ OutStreamer->pushSection();
+ OutStreamer->switchSection(CommandLine);
OutStreamer->emitZeros(1);
for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
const MDNode *N = NMD->getOperand(i);
@@ -2434,7 +2615,7 @@ void AsmPrinter::emitModuleCommandLines(Module &M) {
OutStreamer->emitBytes(S->getString());
OutStreamer->emitZeros(1);
}
- OutStreamer->PopSection();
+ OutStreamer->popSection();
}
//===--------------------------------------------------------------------===//
@@ -2471,7 +2652,7 @@ void AsmPrinter::emitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset,
unsigned Size,
bool IsSectionRelative) const {
if (MAI->needsDwarfSectionOffsetDirective() && IsSectionRelative) {
- OutStreamer->EmitCOFFSecRel32(Label, Offset);
+ OutStreamer->emitCOFFSecRel32(Label, Offset);
if (Size > 4)
OutStreamer->emitZeros(Size - 4);
return;
@@ -2541,6 +2722,9 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
llvm_unreachable("Unknown constant value to lower!");
}
+ // The constant expression opcodes are limited to those that are necessary
+ // to represent relocations on supported targets. Expressions involving only
+ // constant addresses are constant folded instead.
switch (CE->getOpcode()) {
case Instruction::AddrSpaceCast: {
const Constant *Op = CE->getOperand(0);
@@ -2658,34 +2842,17 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
return RelocExpr;
}
}
+
+ const MCExpr *LHS = lowerConstant(CE->getOperand(0));
+ const MCExpr *RHS = lowerConstant(CE->getOperand(1));
+ return MCBinaryExpr::createSub(LHS, RHS, Ctx);
+ break;
}
- // else fallthrough
- LLVM_FALLTHROUGH;
-
- // The MC library also has a right-shift operator, but it isn't consistently
- // signed or unsigned between different targets.
- case Instruction::Add:
- case Instruction::Mul:
- case Instruction::SDiv:
- case Instruction::SRem:
- case Instruction::Shl:
- case Instruction::And:
- case Instruction::Or:
- case Instruction::Xor: {
+
+ case Instruction::Add: {
const MCExpr *LHS = lowerConstant(CE->getOperand(0));
const MCExpr *RHS = lowerConstant(CE->getOperand(1));
- switch (CE->getOpcode()) {
- default: llvm_unreachable("Unknown binary operator constant cast expr");
- case Instruction::Add: return MCBinaryExpr::createAdd(LHS, RHS, Ctx);
- case Instruction::Sub: return MCBinaryExpr::createSub(LHS, RHS, Ctx);
- case Instruction::Mul: return MCBinaryExpr::createMul(LHS, RHS, Ctx);
- case Instruction::SDiv: return MCBinaryExpr::createDiv(LHS, RHS, Ctx);
- case Instruction::SRem: return MCBinaryExpr::createMod(LHS, RHS, Ctx);
- case Instruction::Shl: return MCBinaryExpr::createShl(LHS, RHS, Ctx);
- case Instruction::And: return MCBinaryExpr::createAnd(LHS, RHS, Ctx);
- case Instruction::Or: return MCBinaryExpr::createOr (LHS, RHS, Ctx);
- case Instruction::Xor: return MCBinaryExpr::createXor(LHS, RHS, Ctx);
- }
+ return MCBinaryExpr::createAdd(LHS, RHS, Ctx);
}
}
}
@@ -2719,7 +2886,7 @@ static int isRepeatedByteSequence(const Value *V, const DataLayout &DL) {
assert(Size % 8 == 0);
// Extend the element to take zero padding into account.
- APInt Value = CI->getValue().zextOrSelf(Size);
+ APInt Value = CI->getValue().zext(Size);
if (!Value.isSplat(8))
return -1;
@@ -2768,8 +2935,8 @@ static void emitGlobalConstantDataSequential(const DataLayout &DL,
if (isa<IntegerType>(CDS->getElementType())) {
for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
if (AP.isVerbose())
- AP.OutStreamer->GetCommentOS() << format("0x%" PRIx64 "\n",
- CDS->getElementAsInteger(i));
+ AP.OutStreamer->getCommentOS()
+ << format("0x%" PRIx64 "\n", CDS->getElementAsInteger(i));
AP.OutStreamer->emitIntValue(CDS->getElementAsInteger(i),
ElementByteSize);
}
@@ -2855,8 +3022,8 @@ static void emitGlobalConstantFP(APFloat APF, Type *ET, AsmPrinter &AP) {
if (AP.isVerbose()) {
SmallString<8> StrVal;
APF.toString(StrVal);
- ET->print(AP.OutStreamer->GetCommentOS());
- AP.OutStreamer->GetCommentOS() << ' ' << StrVal << '\n';
+ ET->print(AP.OutStreamer->getCommentOS());
+ AP.OutStreamer->getCommentOS() << ' ' << StrVal << '\n';
}
// Now iterate through the APInt chunks, emitting them in endian-correct
@@ -3061,8 +3228,8 @@ static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *CV,
if (StoreSize <= 8) {
if (AP.isVerbose())
- AP.OutStreamer->GetCommentOS() << format("0x%" PRIx64 "\n",
- CI->getZExtValue());
+ AP.OutStreamer->getCommentOS()
+ << format("0x%" PRIx64 "\n", CI->getZExtValue());
AP.OutStreamer->emitIntValue(CI->getZExtValue(), StoreSize);
} else {
emitGlobalConstantLargeInt(CI, AP);
@@ -3163,11 +3330,12 @@ MCSymbol *AsmPrinter::createTempSymbol(const Twine &Name) const {
}
MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BlockAddress *BA) const {
- return MMI->getAddrLabelSymbol(BA->getBasicBlock());
+ return const_cast<AsmPrinter *>(this)->getAddrLabelSymbol(
+ BA->getBasicBlock());
}
MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BasicBlock *BB) const {
- return MMI->getAddrLabelSymbol(BB);
+ return const_cast<AsmPrinter *>(this)->getAddrLabelSymbol(BB);
}
/// GetCPISymbol - Return the symbol for the specified constant pool entry.
@@ -3272,7 +3440,7 @@ static void emitBasicBlockLoopComments(const MachineBasicBlock &MBB,
// Otherwise, it is a loop header. Print out information about child and
// parent loops.
- raw_ostream &OS = AP.OutStreamer->GetCommentOS();
+ raw_ostream &OS = AP.OutStreamer->getCommentOS();
PrintParentLoopComment(OS, Loop->getParentLoop(), AP.getFunctionNumber());
@@ -3308,7 +3476,7 @@ void AsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) {
// entry block is always placed in the function section and is handled
// separately.
if (MBB.isBeginSection() && !MBB.isEntryBlock()) {
- OutStreamer->SwitchSection(
+ OutStreamer->switchSection(
getObjFileLowering().getSectionForMachineBasicBlock(MF->getFunction(),
MBB, TM));
CurrentSectionBeginSym = MBB.getSymbol();
@@ -3326,7 +3494,7 @@ void AsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) {
// MBBs can have their address taken as part of CodeGen without having
// their corresponding BB's address taken in IR
if (BB && BB->hasAddressTaken())
- for (MCSymbol *Sym : MMI->getAddrLabelSymbolToEmit(BB))
+ for (MCSymbol *Sym : getAddrLabelSymbolToEmit(BB))
OutStreamer->emitLabel(Sym);
}
@@ -3334,9 +3502,9 @@ void AsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) {
if (isVerbose()) {
if (BB) {
if (BB->hasName()) {
- BB->printAsOperand(OutStreamer->GetCommentOS(),
+ BB->printAsOperand(OutStreamer->getCommentOS(),
/*PrintType=*/false, BB->getModule());
- OutStreamer->GetCommentOS() << '\n';
+ OutStreamer->getCommentOS() << '\n';
}
}
@@ -3563,7 +3731,7 @@ void AsmPrinter::emitXRayTable() {
// range of sleds associated with a function.
auto &Ctx = OutContext;
MCSymbol *SledsStart = OutContext.createTempSymbol("xray_sleds_start", true);
- OutStreamer->SwitchSection(InstMap);
+ OutStreamer->switchSection(InstMap);
OutStreamer->emitLabel(SledsStart);
for (const auto &Sled : Sleds) {
MCSymbol *Dot = Ctx.createTempSymbol();
@@ -3590,11 +3758,11 @@ void AsmPrinter::emitXRayTable() {
// Each entry here will be 2 * word size aligned, as we're writing down two
// pointers. This should work for both 32-bit and 64-bit platforms.
if (FnSledIndex) {
- OutStreamer->SwitchSection(FnSledIndex);
+ OutStreamer->switchSection(FnSledIndex);
OutStreamer->emitCodeAlignment(2 * WordSizeBytes, &getSubtargetInfo());
OutStreamer->emitSymbolValue(SledsStart, WordSizeBytes, false);
OutStreamer->emitSymbolValue(SledsEnd, WordSizeBytes, false);
- OutStreamer->SwitchSection(PrevSection);
+ OutStreamer->switchSection(PrevSection);
}
Sleds.clear();
}
@@ -3639,7 +3807,7 @@ void AsmPrinter::emitPatchableFunctionEntries() {
}
LinkedToSym = cast<MCSymbolELF>(CurrentFnSym);
}
- OutStreamer->SwitchSection(OutContext.getELFSection(
+ OutStreamer->switchSection(OutContext.getELFSection(
"__patchable_function_entries", ELF::SHT_PROGBITS, Flags, 0, GroupName,
F.hasComdat(), MCSection::NonUniqueID, LinkedToSym));
emitAlignment(Align(PointerSize));
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index fc127f4cf9da..719fec06aa33 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -10,7 +10,6 @@
//
//===----------------------------------------------------------------------===//
-#include "ByteStreamer.h"
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/AsmPrinter.h"
@@ -19,14 +18,11 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCDwarf.h"
-#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MachineLocation.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetMachine.h"
#include <cstdint>
using namespace llvm;
@@ -162,7 +158,7 @@ void AsmPrinter::emitDwarfSymbolReference(const MCSymbol *Label,
if (MAI->needsDwarfSectionOffsetDirective()) {
assert(!isDwarf64() &&
"emitting DWARF64 is not implemented for COFF targets");
- OutStreamer->EmitCOFFSecRel32(Label, /*Offset=*/0);
+ OutStreamer->emitCOFFSecRel32(Label, /*Offset=*/0);
return;
}
@@ -277,6 +273,12 @@ void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const {
case MCCFIInstruction::OpUndefined:
OutStreamer->emitCFIUndefined(Inst.getRegister());
break;
+ case MCCFIInstruction::OpRememberState:
+ OutStreamer->emitCFIRememberState();
+ break;
+ case MCCFIInstruction::OpRestoreState:
+ OutStreamer->emitCFIRestoreState();
+ break;
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index 5d0cadefdbf7..88c82cbc958b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -17,8 +17,8 @@
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DiagnosticInfo.h"
@@ -26,9 +26,10 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
@@ -115,7 +116,7 @@ void AsmPrinter::emitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
report_fatal_error("Inline asm not supported by this streamer because"
" we don't have an asm parser for this target\n");
Parser->setAssemblerDialect(Dialect);
- Parser->setTargetParser(*TAP.get());
+ Parser->setTargetParser(*TAP);
// Enable lexing Masm binary and hex integer literals in intel inline
// assembly.
if (Dialect == InlineAsm::AD_Intel)
@@ -398,9 +399,9 @@ void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const {
if (!RestrRegs.empty()) {
std::string Msg = "inline asm clobber list contains reserved registers: ";
ListSeparator LS;
- for (const Register &RR : RestrRegs) {
+ for (const Register RR : RestrRegs) {
Msg += LS;
- Msg += TRI->getName(RR);
+ Msg += TRI->getRegAsmName(RR);
}
const char *Note =
"Reserved registers on the clobber list may not be "
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index 52c74713551c..701c0affdfa6 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -11,7 +11,6 @@
//===----------------------------------------------------------------------===//
#include "CodeViewDebug.h"
-#include "DwarfExpression.h"
#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
@@ -29,7 +28,6 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
@@ -41,7 +39,6 @@
#include "llvm/DebugInfo/CodeView/EnumTables.h"
#include "llvm/DebugInfo/CodeView/Line.h"
#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
-#include "llvm/DebugInfo/CodeView/TypeDumpVisitor.h"
#include "llvm/DebugInfo/CodeView/TypeRecord.h"
#include "llvm/DebugInfo/CodeView/TypeTableCollection.h"
#include "llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h"
@@ -58,11 +55,8 @@
#include "llvm/MC/MCSectionCOFF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/BinaryByteStream.h"
-#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/BinaryStreamWriter.h"
#include "llvm/Support/Casting.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
@@ -230,7 +224,7 @@ unsigned CodeViewDebug::maybeRecordFile(const DIFile *F) {
break;
}
}
- bool Success = OS.EmitCVFileDirective(NextId, FullPath, ChecksumAsBytes,
+ bool Success = OS.emitCVFileDirective(NextId, FullPath, ChecksumAsBytes,
static_cast<unsigned>(CSKind));
(void)Success;
assert(Success && ".cv_file directive failed");
@@ -251,7 +245,7 @@ CodeViewDebug::getInlineSite(const DILocation *InlinedAt,
.SiteFuncId;
Site->SiteFuncId = NextFuncId++;
- OS.EmitCVInlineSiteIdDirective(
+ OS.emitCVInlineSiteIdDirective(
Site->SiteFuncId, ParentFuncId, maybeRecordFile(InlinedAt->getFile()),
InlinedAt->getLine(), InlinedAt->getColumn(), SMLoc());
Site->Inlinee = Inlinee;
@@ -515,7 +509,7 @@ void CodeViewDebug::maybeRecordLocation(const DebugLoc &DL,
if (!DL || DL == PrevInstLoc)
return;
- const DIScope *Scope = DL.get()->getScope();
+ const DIScope *Scope = DL->getScope();
if (!Scope)
return;
@@ -614,18 +608,16 @@ static SourceLanguage MapDWLangToCVLang(unsigned DWLang) {
void CodeViewDebug::beginModule(Module *M) {
// If module doesn't have named metadata anchors or COFF debug section
// is not available, skip any debug info related stuff.
- NamedMDNode *CUs = M->getNamedMetadata("llvm.dbg.cu");
- if (!CUs || !Asm->getObjFileLowering().getCOFFDebugSymbolsSection()) {
+ if (!MMI->hasDebugInfo() ||
+ !Asm->getObjFileLowering().getCOFFDebugSymbolsSection()) {
Asm = nullptr;
return;
}
- // Tell MMI that we have and need debug info.
- MMI->setDebugInfoAvailability(true);
TheCPU = mapArchToCVCPUType(Triple(M->getTargetTriple()).getArch());
// Get the current source language.
- const MDNode *Node = *CUs->operands().begin();
+ const MDNode *Node = *M->debug_compile_units_begin();
const auto *CU = cast<DICompileUnit>(Node);
CurrentSourceLanguage = MapDWLangToCVLang(CU->getSourceLanguage());
@@ -727,7 +719,7 @@ void CodeViewDebug::emitTypeInformation() {
return;
// Start the .debug$T or .debug$P section with 0x4.
- OS.SwitchSection(Asm->getObjFileLowering().getCOFFDebugTypesSection());
+ OS.switchSection(Asm->getObjFileLowering().getCOFFDebugTypesSection());
emitCodeViewMagicVersion();
TypeTableCollection Table(TypeTable.records());
@@ -760,7 +752,7 @@ void CodeViewDebug::emitTypeGlobalHashes() {
// Start the .debug$H section with the version and hash algorithm, currently
// hardcoded to version 0, SHA1.
- OS.SwitchSection(Asm->getObjFileLowering().getCOFFGlobalTypeHashesSection());
+ OS.switchSection(Asm->getObjFileLowering().getCOFFGlobalTypeHashesSection());
OS.emitValueToAlignment(4);
OS.AddComment("Magic");
@@ -826,6 +818,8 @@ static Version parseVersion(StringRef Name) {
if (isdigit(C)) {
V.Part[N] *= 10;
V.Part[N] += C - '0';
+ V.Part[N] =
+ std::min<int>(V.Part[N], std::numeric_limits<uint16_t>::max());
} else if (C == '.') {
++N;
if (N >= 4)
@@ -867,7 +861,6 @@ void CodeViewDebug::emitCompilerInformation() {
Version FrontVer = parseVersion(CompilerVersion);
OS.AddComment("Frontend version");
for (int N : FrontVer.Part) {
- N = std::min<int>(N, std::numeric_limits<uint16_t>::max());
OS.emitInt16(N);
}
@@ -985,11 +978,11 @@ void CodeViewDebug::emitInlineeLinesSubsection() {
assert(TypeIndices.count({SP, nullptr}));
TypeIndex InlineeIdx = TypeIndices[{SP, nullptr}];
- OS.AddBlankLine();
+ OS.addBlankLine();
unsigned FileId = maybeRecordFile(SP->getFile());
OS.AddComment("Inlined function " + SP->getName() + " starts at " +
SP->getFilename() + Twine(':') + Twine(SP->getLine()));
- OS.AddBlankLine();
+ OS.addBlankLine();
OS.AddComment("Type index of inlined function");
OS.emitInt32(InlineeIdx.getIndex());
OS.AddComment("Offset into filechecksum table");
@@ -1051,7 +1044,7 @@ void CodeViewDebug::switchToDebugSectionForSymbol(const MCSymbol *GVSym) {
Asm->getObjFileLowering().getCOFFDebugSymbolsSection());
DebugSec = OS.getContext().getAssociativeCOFFSection(DebugSec, KeySym);
- OS.SwitchSection(DebugSec);
+ OS.switchSection(DebugSec);
// Emit the magic version number if this is the first time we've switched to
// this section.
@@ -1080,9 +1073,9 @@ void CodeViewDebug::emitDebugInfoForThunk(const Function *GV,
OS.AddComment("PtrNext");
OS.emitInt32(0);
OS.AddComment("Thunk section relative address");
- OS.EmitCOFFSecRel32(Fn, /*Offset=*/0);
+ OS.emitCOFFSecRel32(Fn, /*Offset=*/0);
OS.AddComment("Thunk section index");
- OS.EmitCOFFSectionIndex(Fn);
+ OS.emitCOFFSectionIndex(Fn);
OS.AddComment("Code size");
OS.emitAbsoluteSymbolDiff(FI.End, Fn, 2);
OS.AddComment("Ordinal");
@@ -1132,7 +1125,7 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
// Emit FPO data, but only on 32-bit x86. No other platforms use it.
if (Triple(MMI->getModule()->getTargetTriple()).getArch() == Triple::x86)
- OS.EmitCVFPOData(Fn);
+ OS.emitCVFPOData(Fn);
// Emit a symbol subsection, required by VS2012+ to find function boundaries.
OS.AddComment("Symbol subsection for " + Twine(FuncName));
@@ -1160,9 +1153,9 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
OS.AddComment("Function type index");
OS.emitInt32(getFuncIdForSubprogram(GV->getSubprogram()).getIndex());
OS.AddComment("Function section relative address");
- OS.EmitCOFFSecRel32(Fn, /*Offset=*/0);
+ OS.emitCOFFSecRel32(Fn, /*Offset=*/0);
OS.AddComment("Function section index");
- OS.EmitCOFFSectionIndex(Fn);
+ OS.emitCOFFSectionIndex(Fn);
OS.AddComment("Flags");
OS.emitInt8(0);
// Emit the function display name as a null-terminated string.
@@ -1207,9 +1200,9 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
MCSymbol *Label = Annot.first;
MDTuple *Strs = cast<MDTuple>(Annot.second);
MCSymbol *AnnotEnd = beginSymbolRecord(SymbolKind::S_ANNOTATION);
- OS.EmitCOFFSecRel32(Label, /*Offset=*/0);
+ OS.emitCOFFSecRel32(Label, /*Offset=*/0);
// FIXME: Make sure we don't overflow the max record size.
- OS.EmitCOFFSectionIndex(Label);
+ OS.emitCOFFSectionIndex(Label);
OS.emitInt16(Strs->getNumOperands());
for (Metadata *MD : Strs->operands()) {
// MDStrings are null terminated, so we can do EmitBytes and get the
@@ -1227,9 +1220,9 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
const DIType *DITy = std::get<2>(HeapAllocSite);
MCSymbol *HeapAllocEnd = beginSymbolRecord(SymbolKind::S_HEAPALLOCSITE);
OS.AddComment("Call site offset");
- OS.EmitCOFFSecRel32(BeginLabel, /*Offset=*/0);
+ OS.emitCOFFSecRel32(BeginLabel, /*Offset=*/0);
OS.AddComment("Call site section index");
- OS.EmitCOFFSectionIndex(BeginLabel);
+ OS.emitCOFFSectionIndex(BeginLabel);
OS.AddComment("Call instruction length");
OS.emitAbsoluteSymbolDiff(EndLabel, BeginLabel, 2);
OS.AddComment("Type index");
@@ -1249,9 +1242,9 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
OS.emitCVLinetableDirective(FI.FuncId, Fn, FI.End);
}
-CodeViewDebug::LocalVarDefRange
+CodeViewDebug::LocalVarDef
CodeViewDebug::createDefRangeMem(uint16_t CVRegister, int Offset) {
- LocalVarDefRange DR;
+ LocalVarDef DR;
DR.InMemory = -1;
DR.DataOffset = Offset;
assert(DR.DataOffset == Offset && "truncation");
@@ -1303,19 +1296,19 @@ void CodeViewDebug::collectVariableInfoFromMFTable(
"Frame offsets with a scalable component are not supported");
// Calculate the label ranges.
- LocalVarDefRange DefRange =
+ LocalVarDef DefRange =
createDefRangeMem(CVReg, FrameOffset.getFixed() + ExprOffset);
+ LocalVariable Var;
+ Var.DIVar = VI.Var;
+
for (const InsnRange &Range : Scope->getRanges()) {
const MCSymbol *Begin = getLabelBeforeInsn(Range.first);
const MCSymbol *End = getLabelAfterInsn(Range.second);
End = End ? End : Asm->getFunctionEnd();
- DefRange.Ranges.emplace_back(Begin, End);
+ Var.DefRanges[DefRange].emplace_back(Begin, End);
}
- LocalVariable Var;
- Var.DIVar = VI.Var;
- Var.DefRanges.emplace_back(std::move(DefRange));
if (Deref)
Var.UseReferenceType = true;
@@ -1374,24 +1367,18 @@ void CodeViewDebug::calculateRanges(
// We can only handle a register or an offseted load of a register.
if (Location->Register == 0 || Location->LoadChain.size() > 1)
continue;
- {
- LocalVarDefRange DR;
- DR.CVRegister = TRI->getCodeViewRegNum(Location->Register);
- DR.InMemory = !Location->LoadChain.empty();
- DR.DataOffset =
- !Location->LoadChain.empty() ? Location->LoadChain.back() : 0;
- if (Location->FragmentInfo) {
- DR.IsSubfield = true;
- DR.StructOffset = Location->FragmentInfo->OffsetInBits / 8;
- } else {
- DR.IsSubfield = false;
- DR.StructOffset = 0;
- }
- if (Var.DefRanges.empty() ||
- Var.DefRanges.back().isDifferentLocation(DR)) {
- Var.DefRanges.emplace_back(std::move(DR));
- }
+ LocalVarDef DR;
+ DR.CVRegister = TRI->getCodeViewRegNum(Location->Register);
+ DR.InMemory = !Location->LoadChain.empty();
+ DR.DataOffset =
+ !Location->LoadChain.empty() ? Location->LoadChain.back() : 0;
+ if (Location->FragmentInfo) {
+ DR.IsSubfield = true;
+ DR.StructOffset = Location->FragmentInfo->OffsetInBits / 8;
+ } else {
+ DR.IsSubfield = false;
+ DR.StructOffset = 0;
}
// Compute the label range.
@@ -1408,7 +1395,7 @@ void CodeViewDebug::calculateRanges(
// If the last range end is our begin, just extend the last range.
// Otherwise make a new range.
SmallVectorImpl<std::pair<const MCSymbol *, const MCSymbol *>> &R =
- Var.DefRanges.back().Ranges;
+ Var.DefRanges[DR];
if (!R.empty() && R.back().second == Begin)
R.back().second = End;
else
@@ -1525,7 +1512,7 @@ void CodeViewDebug::beginFunctionImpl(const MachineFunction *MF) {
// FIXME: Set GuardCfg when it is implemented.
CurFn->FrameProcOpts = FPO;
- OS.EmitCVFuncIdDirective(CurFn->FuncId);
+ OS.emitCVFuncIdDirective(CurFn->FuncId);
// Find the end of the function prolog. First known non-DBG_VALUE and
// non-frame setup location marks the beginning of the function body.
@@ -1825,6 +1812,7 @@ TypeIndex CodeViewDebug::lowerTypeBasic(const DIBasicType *Ty) {
break;
case dwarf::DW_ATE_UTF:
switch (ByteSize) {
+ case 1: STK = SimpleTypeKind::Character8; break;
case 2: STK = SimpleTypeKind::Character16; break;
case 4: STK = SimpleTypeKind::Character32; break;
}
@@ -2820,7 +2808,9 @@ void CodeViewDebug::emitLocalVariable(const FunctionInfo &FI,
// records and on disk formats are described in SymbolRecords.h. BytePrefix
// should be big enough to hold all forms without memory allocation.
SmallString<20> BytePrefix;
- for (const LocalVarDefRange &DefRange : Var.DefRanges) {
+ for (const auto &Pair : Var.DefRanges) {
+ LocalVarDef DefRange = Pair.first;
+ const auto &Ranges = Pair.second;
BytePrefix.clear();
if (DefRange.InMemory) {
int Offset = DefRange.DataOffset;
@@ -2844,7 +2834,7 @@ void CodeViewDebug::emitLocalVariable(const FunctionInfo &FI,
: (EncFP == FI.EncodedLocalFramePtrReg))) {
DefRangeFramePointerRelHeader DRHdr;
DRHdr.Offset = Offset;
- OS.emitCVDefRangeDirective(DefRange.Ranges, DRHdr);
+ OS.emitCVDefRangeDirective(Ranges, DRHdr);
} else {
uint16_t RegRelFlags = 0;
if (DefRange.IsSubfield) {
@@ -2856,7 +2846,7 @@ void CodeViewDebug::emitLocalVariable(const FunctionInfo &FI,
DRHdr.Register = Reg;
DRHdr.Flags = RegRelFlags;
DRHdr.BasePointerOffset = Offset;
- OS.emitCVDefRangeDirective(DefRange.Ranges, DRHdr);
+ OS.emitCVDefRangeDirective(Ranges, DRHdr);
}
} else {
assert(DefRange.DataOffset == 0 && "unexpected offset into register");
@@ -2865,12 +2855,12 @@ void CodeViewDebug::emitLocalVariable(const FunctionInfo &FI,
DRHdr.Register = DefRange.CVRegister;
DRHdr.MayHaveNoName = 0;
DRHdr.OffsetInParent = DefRange.StructOffset;
- OS.emitCVDefRangeDirective(DefRange.Ranges, DRHdr);
+ OS.emitCVDefRangeDirective(Ranges, DRHdr);
} else {
DefRangeRegisterHeader DRHdr;
DRHdr.Register = DefRange.CVRegister;
DRHdr.MayHaveNoName = 0;
- OS.emitCVDefRangeDirective(DefRange.Ranges, DRHdr);
+ OS.emitCVDefRangeDirective(Ranges, DRHdr);
}
}
}
@@ -2894,9 +2884,9 @@ void CodeViewDebug::emitLexicalBlock(const LexicalBlock &Block,
OS.AddComment("Code size");
OS.emitAbsoluteSymbolDiff(Block.End, Block.Begin, 4); // Code Size
OS.AddComment("Function section relative address");
- OS.EmitCOFFSecRel32(Block.Begin, /*Offset=*/0); // Func Offset
+ OS.emitCOFFSecRel32(Block.Begin, /*Offset=*/0); // Func Offset
OS.AddComment("Function section index");
- OS.EmitCOFFSectionIndex(FI.Begin); // Func Symbol
+ OS.emitCOFFSectionIndex(FI.Begin); // Func Symbol
OS.AddComment("Lexical block name");
emitNullTerminatedSymbolName(OS, Block.Name); // Name
endSymbolRecord(RecordEnd);
@@ -3181,6 +3171,11 @@ void CodeViewDebug::collectGlobalVariableInfo() {
for (const auto *GVE : CU->getGlobalVariables()) {
const DIGlobalVariable *DIGV = GVE->getVariable();
const DIExpression *DIE = GVE->getExpression();
+ // Don't emit string literals in CodeView, as the only useful parts are
+ // generally the filename and line number, which isn't possible to output
+ // in CodeView. String literals should be the only unnamed GlobalVariable
+ // with debug info.
+ if (DIGV->getName().empty()) continue;
if ((DIE->getNumElements() == 2) &&
(DIE->getElement(0) == dwarf::DW_OP_plus_uconst))
@@ -3380,10 +3375,10 @@ void CodeViewDebug::emitDebugInfoForGlobal(const CVGlobalVariable &CVGV) {
if (CVGlobalVariableOffsets.find(DIGV) != CVGlobalVariableOffsets.end())
// Use the offset seen while collecting info on globals.
Offset = CVGlobalVariableOffsets[DIGV];
- OS.EmitCOFFSecRel32(GVSym, Offset);
+ OS.emitCOFFSecRel32(GVSym, Offset);
OS.AddComment("Segment");
- OS.EmitCOFFSectionIndex(GVSym);
+ OS.emitCOFFSectionIndex(GVSym);
OS.AddComment("Name");
const unsigned LengthOfDataRecord = 12;
emitNullTerminatedSymbolName(OS, QualifiedName, LengthOfDataRecord);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
index d1fc3cdccb20..16f0082723ed 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
@@ -50,18 +50,8 @@ class MachineFunction;
/// Collects and handles line tables information in a CodeView format.
class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
- MCStreamer &OS;
- BumpPtrAllocator Allocator;
- codeview::GlobalTypeTableBuilder TypeTable;
-
- /// Whether to emit type record hashes into .debug$H.
- bool EmitDebugGlobalHashes = false;
-
- /// The codeview CPU type used by the translation unit.
- codeview::CPUType TheCPU;
-
- /// Represents the most general definition range.
- struct LocalVarDefRange {
+public:
+ struct LocalVarDef {
/// Indicates that variable data is stored in memory relative to the
/// specified register.
int InMemory : 1;
@@ -79,23 +69,40 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
/// location containing the data.
uint16_t CVRegister;
- /// Compares all location fields. This includes all fields except the label
- /// ranges.
- bool isDifferentLocation(LocalVarDefRange &O) {
- return InMemory != O.InMemory || DataOffset != O.DataOffset ||
- IsSubfield != O.IsSubfield || StructOffset != O.StructOffset ||
- CVRegister != O.CVRegister;
+ uint64_t static toOpaqueValue(const LocalVarDef DR) {
+ uint64_t Val = 0;
+ std::memcpy(&Val, &DR, sizeof(Val));
+ return Val;
}
- SmallVector<std::pair<const MCSymbol *, const MCSymbol *>, 1> Ranges;
+ LocalVarDef static createFromOpaqueValue(uint64_t Val) {
+ LocalVarDef DR;
+ std::memcpy(&DR, &Val, sizeof(Val));
+ return DR;
+ }
};
- static LocalVarDefRange createDefRangeMem(uint16_t CVRegister, int Offset);
+ static_assert(sizeof(uint64_t) == sizeof(LocalVarDef), "");
+
+private:
+ MCStreamer &OS;
+ BumpPtrAllocator Allocator;
+ codeview::GlobalTypeTableBuilder TypeTable;
+
+ /// Whether to emit type record hashes into .debug$H.
+ bool EmitDebugGlobalHashes = false;
+
+ /// The codeview CPU type used by the translation unit.
+ codeview::CPUType TheCPU;
+
+ static LocalVarDef createDefRangeMem(uint16_t CVRegister, int Offset);
/// Similar to DbgVariable in DwarfDebug, but not dwarf-specific.
struct LocalVariable {
const DILocalVariable *DIVar = nullptr;
- SmallVector<LocalVarDefRange, 1> DefRanges;
+ MapVector<LocalVarDef,
+ SmallVector<std::pair<const MCSymbol *, const MCSymbol *>, 1>>
+ DefRanges;
bool UseReferenceType = false;
};
@@ -493,6 +500,27 @@ public:
void beginInstruction(const MachineInstr *MI) override;
};
+template <> struct DenseMapInfo<CodeViewDebug::LocalVarDef> {
+
+ static inline CodeViewDebug::LocalVarDef getEmptyKey() {
+ return CodeViewDebug::LocalVarDef::createFromOpaqueValue(~0ULL);
+ }
+
+ static inline CodeViewDebug::LocalVarDef getTombstoneKey() {
+ return CodeViewDebug::LocalVarDef::createFromOpaqueValue(~0ULL - 1ULL);
+ }
+
+ static unsigned getHashValue(const CodeViewDebug::LocalVarDef &DR) {
+ return CodeViewDebug::LocalVarDef::toOpaqueValue(DR) * 37ULL;
+ }
+
+ static bool isEqual(const CodeViewDebug::LocalVarDef &LHS,
+ const CodeViewDebug::LocalVarDef &RHS) {
+ return CodeViewDebug::LocalVarDef::toOpaqueValue(LHS) ==
+ CodeViewDebug::LocalVarDef::toOpaqueValue(RHS);
+ }
+};
+
} // end namespace llvm
#endif // LLVM_LIB_CODEGEN_ASMPRINTER_CODEVIEWDEBUG_H
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
index 396322c4979d..617ddbd66e4e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -13,21 +13,15 @@
#include "llvm/CodeGen/DIE.h"
#include "DwarfCompileUnit.h"
#include "DwarfDebug.h"
-#include "DwarfUnit.h"
-#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/Config/llvm-config.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
-#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/LEB128.h"
-#include "llvm/Support/MD5.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -170,7 +164,7 @@ DIEAbbrev &DIEAbbrevSet::uniqueAbbreviation(DIE &Die) {
void DIEAbbrevSet::Emit(const AsmPrinter *AP, MCSection *Section) const {
if (!Abbreviations.empty()) {
// Start the debug abbrev section.
- AP->OutStreamer->SwitchSection(Section);
+ AP->OutStreamer->switchSection(Section);
AP->emitDwarfAbbrevs(Abbreviations);
}
}
@@ -204,6 +198,7 @@ const DIE *DIE::getUnitDie() const {
const DIE *p = this;
while (p) {
if (p->getTag() == dwarf::DW_TAG_compile_unit ||
+ p->getTag() == dwarf::DW_TAG_skeleton_unit ||
p->getTag() == dwarf::DW_TAG_type_unit)
return p;
p = p->getParent();
@@ -378,7 +373,7 @@ void DIEInteger::emitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
case dwarf::DW_FORM_flag_present:
// Emit something to keep the lines and comments in sync.
// FIXME: Is there a better way to do this?
- Asm->OutStreamer->AddBlankLine();
+ Asm->OutStreamer->addBlankLine();
return;
case dwarf::DW_FORM_flag:
case dwarf::DW_FORM_ref1:
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
index e175854f7b93..5da50d7aab9f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
@@ -19,7 +19,6 @@
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/Endian.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
index dd795079ac1a..1358f4d25990 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
@@ -7,7 +7,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/DbgEntityHistoryCalculator.h"
-#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
@@ -204,7 +203,7 @@ void DbgValueHistoryMap::trimLocationRanges(
if (auto R = intersects(StartMI, EndMI, ScopeRanges, Ordering)) {
// Adjust ScopeRanges to exclude ranges which subsequent location ranges
// cannot possibly intersect.
- ScopeRanges = ArrayRef<InsnRange>(R.getValue(), ScopeRanges.end());
+ ScopeRanges = ArrayRef<InsnRange>(*R, ScopeRanges.end());
} else {
// If the location range does not intersect any scope range then the
// DBG_VALUE which opened this location range is usless, mark it for
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
index 18fc46c74eb4..660a064687d3 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
@@ -13,7 +13,6 @@
#include "llvm/CodeGen/DebugHandlerBase.h"
#include "llvm/ADT/Optional.h"
-#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
index 63343d2519f9..5f187acf13dc 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -11,23 +11,13 @@
//===----------------------------------------------------------------------===//
#include "DwarfException.h"
-#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Mangler.h"
-#include "llvm/IR/Module.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MachineLocation.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/FormattedStream.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
@@ -53,7 +43,7 @@ void DwarfCFIExceptionBase::endFragment() {
DwarfCFIException::DwarfCFIException(AsmPrinter *A)
: DwarfCFIExceptionBase(A) {}
-DwarfCFIException::~DwarfCFIException() {}
+DwarfCFIException::~DwarfCFIException() = default;
/// endModule - Emit all exception information that should come after the
/// content.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 5913c687db48..b3f99d346faa 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -21,7 +21,6 @@
#include "llvm/CodeGen/DIE.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
@@ -67,13 +66,13 @@ DwarfCompileUnit::DwarfCompileUnit(unsigned UID, const DICompileUnit *Node,
/// DW_FORM_addr or DW_FORM_GNU_addr_index.
void DwarfCompileUnit::addLabelAddress(DIE &Die, dwarf::Attribute Attribute,
const MCSymbol *Label) {
+ if ((Skeleton || !DD->useSplitDwarf()) && Label)
+ DD->addArangeLabel(SymbolCU(this, Label));
+
// Don't use the address pool in non-fission or in the skeleton unit itself.
if ((!DD->useSplitDwarf() || !Skeleton) && DD->getDwarfVersion() < 5)
return addLocalLabelAddress(Die, Attribute, Label);
- if (Label)
- DD->addArangeLabel(SymbolCU(this, Label));
-
bool UseAddrOffsetFormOrExpressions =
DD->useAddrOffsetForm() || DD->useAddrOffsetExpressions();
@@ -109,9 +108,6 @@ void DwarfCompileUnit::addLocalLabelAddress(DIE &Die,
dwarf::Attribute Attribute,
const MCSymbol *Label) {
if (Label)
- DD->addArangeLabel(SymbolCU(this, Label));
-
- if (Label)
addAttribute(Die, Attribute, dwarf::DW_FORM_addr, DIELabel(Label));
else
addAttribute(Die, Attribute, dwarf::DW_FORM_addr, DIEInteger(0));
@@ -169,7 +165,9 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
} else {
DeclContext = GV->getScope();
// Add name and type.
- addString(*VariableDIE, dwarf::DW_AT_name, GV->getDisplayName());
+ StringRef DisplayName = GV->getDisplayName();
+ if (!DisplayName.empty())
+ addString(*VariableDIE, dwarf::DW_AT_name, GV->getDisplayName());
if (GTy)
addType(*VariableDIE, GTy);
@@ -303,8 +301,11 @@ void DwarfCompileUnit::addLocationAttribute(
DD->useGNUTLSOpcode() ? dwarf::DW_OP_GNU_push_tls_address
: dwarf::DW_OP_form_tls_address);
}
- } else if (Asm->TM.getRelocationModel() == Reloc::RWPI ||
- Asm->TM.getRelocationModel() == Reloc::ROPI_RWPI) {
+ } else if ((Asm->TM.getRelocationModel() == Reloc::RWPI ||
+ Asm->TM.getRelocationModel() == Reloc::ROPI_RWPI) &&
+ !Asm->getObjFileLowering()
+ .getKindForGlobal(Global, Asm->TM)
+ .isReadOnly()) {
auto FormAndOp = GetPointerSizedFormAndOp();
// Constant
addUInt(*Loc, dwarf::DW_FORM_data1, FormAndOp.Op);
@@ -505,7 +506,7 @@ DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) {
// FIXME: when writing dwo, we need to avoid relocations. Probably
// the "right" solution is to treat globals the way func and data
// symbols are (with entries in .debug_addr).
- // For now, since we only ever use index 0, this should work as-is.
+ // For now, since we only ever use index 0, this should work as-is.
addUInt(*Loc, dwarf::DW_FORM_data4, FrameBase.Location.WasmLoc.Index);
}
addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_stack_value);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index f2e1f6346803..61412cde34c8 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -25,7 +25,6 @@
#include "llvm/CodeGen/LexicalScopes.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/Support/Casting.h"
-#include <algorithm>
#include <cassert>
#include <cstdint>
#include <memory>
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 609b568f28be..866338a949f3 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -31,8 +31,8 @@
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
-#include "llvm/DebugInfo/DWARF/DWARFExpression.h"
#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
+#include "llvm/DebugInfo/DWARF/DWARFExpression.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
@@ -45,14 +45,11 @@
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/MC/MachineLocation.h"
#include "llvm/MC/SectionKind.h"
-#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MD5.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
@@ -360,7 +357,7 @@ DwarfDebug::DwarfDebug(AsmPrinter *A)
DebuggerTuning = Asm->TM.Options.DebuggerTuning;
else if (IsDarwin)
DebuggerTuning = DebuggerKind::LLDB;
- else if (TT.isPS4CPU())
+ else if (TT.isPS())
DebuggerTuning = DebuggerKind::SCE;
else if (TT.isOSAIX())
DebuggerTuning = DebuggerKind::DBX;
@@ -2315,7 +2312,7 @@ void DwarfDebug::emitStringOffsetsTableHeader() {
template <typename AccelTableT>
void DwarfDebug::emitAccel(AccelTableT &Accel, MCSection *Section,
StringRef TableName) {
- Asm->OutStreamer->SwitchSection(Section);
+ Asm->OutStreamer->switchSection(Section);
// Emit the full data.
emitAppleAccelTable(Asm, Accel, TableName, Section->getBeginSymbol());
@@ -2434,12 +2431,12 @@ void DwarfDebug::emitDebugPubSections() {
bool GnuStyle = TheU->getCUNode()->getNameTableKind() ==
DICompileUnit::DebugNameTableKind::GNU;
- Asm->OutStreamer->SwitchSection(
+ Asm->OutStreamer->switchSection(
GnuStyle ? Asm->getObjFileLowering().getDwarfGnuPubNamesSection()
: Asm->getObjFileLowering().getDwarfPubNamesSection());
emitDebugPubSection(GnuStyle, "Names", TheU, TheU->getGlobalNames());
- Asm->OutStreamer->SwitchSection(
+ Asm->OutStreamer->switchSection(
GnuStyle ? Asm->getObjFileLowering().getDwarfGnuPubTypesSection()
: Asm->getObjFileLowering().getDwarfPubTypesSection());
emitDebugPubSection(GnuStyle, "Types", TheU, TheU->getGlobalTypes());
@@ -2849,7 +2846,7 @@ void DwarfDebug::emitDebugLocImpl(MCSection *Sec) {
if (DebugLocs.getLists().empty())
return;
- Asm->OutStreamer->SwitchSection(Sec);
+ Asm->OutStreamer->switchSection(Sec);
MCSymbol *TableEnd = nullptr;
if (getDwarfVersion() >= 5)
@@ -2880,7 +2877,7 @@ void DwarfDebug::emitDebugLocDWO() {
}
for (const auto &List : DebugLocs.getLists()) {
- Asm->OutStreamer->SwitchSection(
+ Asm->OutStreamer->switchSection(
Asm->getObjFileLowering().getDwarfLocDWOSection());
Asm->OutStreamer->emitLabel(List.Label);
@@ -2953,8 +2950,8 @@ void DwarfDebug::emitDebugARanges() {
// Sort the symbols by offset within the section.
llvm::stable_sort(List, [&](const SymbolCU &A, const SymbolCU &B) {
- unsigned IA = A.Sym ? Asm->OutStreamer->GetSymbolOrder(A.Sym) : 0;
- unsigned IB = B.Sym ? Asm->OutStreamer->GetSymbolOrder(B.Sym) : 0;
+ unsigned IA = A.Sym ? Asm->OutStreamer->getSymbolOrder(A.Sym) : 0;
+ unsigned IB = B.Sym ? Asm->OutStreamer->getSymbolOrder(B.Sym) : 0;
// Symbols with no order assigned should be placed at the end.
// (e.g. section end labels)
@@ -2987,7 +2984,7 @@ void DwarfDebug::emitDebugARanges() {
}
// Start the dwarf aranges section.
- Asm->OutStreamer->SwitchSection(
+ Asm->OutStreamer->switchSection(
Asm->getObjFileLowering().getDwarfARangesSection());
unsigned PtrSize = Asm->MAI->getCodePointerSize();
@@ -3045,15 +3042,22 @@ void DwarfDebug::emitDebugARanges() {
for (const ArangeSpan &Span : List) {
Asm->emitLabelReference(Span.Start, PtrSize);
- // Calculate the size as being from the span start to it's end.
- if (Span.End) {
+ // Calculate the size as being from the span start to its end.
+ //
+ // If the size is zero, then round it up to one byte. The DWARF
+ // specification requires that entries in this table have nonzero
+ // lengths.
+ auto SizeRef = SymSize.find(Span.Start);
+ if ((SizeRef == SymSize.end() || SizeRef->second != 0) && Span.End) {
Asm->emitLabelDifference(Span.End, Span.Start, PtrSize);
} else {
// For symbols without an end marker (e.g. common), we
// write a single arange entry containing just that one symbol.
- uint64_t Size = SymSize[Span.Start];
- if (Size == 0)
+ uint64_t Size;
+ if (SizeRef == SymSize.end() || SizeRef->second == 0)
Size = 1;
+ else
+ Size = SizeRef->second;
Asm->OutStreamer->emitIntValue(Size, PtrSize);
}
@@ -3087,7 +3091,7 @@ void DwarfDebug::emitDebugRangesImpl(const DwarfFile &Holder, MCSection *Section
return !Pair.second->getCUNode()->isDebugDirectivesOnly();
}));
- Asm->OutStreamer->SwitchSection(Section);
+ Asm->OutStreamer->switchSection(Section);
MCSymbol *TableEnd = nullptr;
if (getDwarfVersion() >= 5)
@@ -3239,7 +3243,7 @@ void DwarfDebug::emitDebugMacinfoImpl(MCSection *Section) {
DIMacroNodeArray Macros = CUNode->getMacros();
if (Macros.empty())
continue;
- Asm->OutStreamer->SwitchSection(Section);
+ Asm->OutStreamer->switchSection(Section);
Asm->OutStreamer->emitLabel(U.getMacroLabelBegin());
if (UseDebugMacroSection)
emitMacroHeader(Asm, *this, U, getDwarfVersion());
@@ -3447,22 +3451,6 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
CU.addDIETypeSignature(RefDie, Signature);
}
-DwarfDebug::NonTypeUnitContext::NonTypeUnitContext(DwarfDebug *DD)
- : DD(DD),
- TypeUnitsUnderConstruction(std::move(DD->TypeUnitsUnderConstruction)), AddrPoolUsed(DD->AddrPool.hasBeenUsed()) {
- DD->TypeUnitsUnderConstruction.clear();
- DD->AddrPool.resetUsedFlag();
-}
-
-DwarfDebug::NonTypeUnitContext::~NonTypeUnitContext() {
- DD->TypeUnitsUnderConstruction = std::move(TypeUnitsUnderConstruction);
- DD->AddrPool.resetUsedFlag(AddrPoolUsed);
-}
-
-DwarfDebug::NonTypeUnitContext DwarfDebug::enterNonTypeUnitContext() {
- return NonTypeUnitContext(this);
-}
-
// Add the Name along with its companion DIE to the appropriate accelerator
// table (for AccelTableKind::Dwarf it's always AccelDebugNames, for
// AccelTableKind::Apple, we use the table we got as an argument). If
@@ -3555,6 +3543,6 @@ Optional<MD5::MD5Result> DwarfDebug::getMD5AsBytes(const DIFile *File) const {
// An MD5 checksum is 16 bytes.
std::string ChecksumString = fromHex(Checksum->Value);
MD5::MD5Result CKMem;
- std::copy(ChecksumString.begin(), ChecksumString.end(), CKMem.Bytes.data());
+ std::copy(ChecksumString.begin(), ChecksumString.end(), CKMem.data());
return CKMem;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 4e1a1b1e068d..31e4081b7141 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -14,14 +14,13 @@
#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFDEBUG_H
#include "AddressPool.h"
-#include "DebugLocStream.h"
#include "DebugLocEntry.h"
+#include "DebugLocStream.h"
#include "DwarfFile.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -31,7 +30,6 @@
#include "llvm/CodeGen/AccelTable.h"
#include "llvm/CodeGen/DbgEntityHistoryCalculator.h"
#include "llvm/CodeGen/DebugHandlerBase.h"
-#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Metadata.h"
@@ -80,7 +78,7 @@ private:
public:
DbgEntity(const DINode *N, const DILocation *IA, DbgEntityKind ID)
: Entity(N), InlinedAt(IA), SubclassID(ID) {}
- virtual ~DbgEntity() {}
+ virtual ~DbgEntity() = default;
/// Accessors.
/// @{
@@ -667,19 +665,6 @@ public:
void addDwarfTypeUnitType(DwarfCompileUnit &CU, StringRef Identifier,
DIE &Die, const DICompositeType *CTy);
- class NonTypeUnitContext {
- DwarfDebug *DD;
- decltype(DwarfDebug::TypeUnitsUnderConstruction) TypeUnitsUnderConstruction;
- bool AddrPoolUsed;
- friend class DwarfDebug;
- NonTypeUnitContext(DwarfDebug *DD);
- public:
- NonTypeUnitContext(NonTypeUnitContext&&) = default;
- ~NonTypeUnitContext();
- };
-
- NonTypeUnitContext enterNonTypeUnitContext();
-
/// Add a label so that arange data can be generated for it.
void addArangeLabel(SymbolCU SCU) { ArangeLabels.push_back(SCU); }
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index fe438102ee98..1c21d5ee8bb1 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -329,7 +329,16 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI,
return false;
}
- assert(DwarfRegs.size() == 1);
+ // TODO: We should not give up here but the following code needs to be changed
+ // to deal with multiple (sub)registers first.
+ if (DwarfRegs.size() > 1) {
+ LLVM_DEBUG(dbgs() << "TODO: giving up on debug information due to "
+ "multi-register usage.\n");
+ DwarfRegs.clear();
+ LocationKind = Unknown;
+ return false;
+ }
+
auto Reg = DwarfRegs[0];
bool FBReg = isFrameRegister(TRI, MachineReg);
int SignedOffset = 0;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
index a67d0f032cf6..a497aa07284e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
@@ -12,9 +12,7 @@
#include "DwarfUnit.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/IR/DebugInfoMetadata.h"
-#include "llvm/IR/Metadata.h"
#include "llvm/MC/MCStreamer.h"
-#include <algorithm>
#include <cstdint>
using namespace llvm;
@@ -47,7 +45,7 @@ void DwarfFile::emitUnit(DwarfUnit *TheU, bool UseOffsets) {
if (llvm::empty(TheU->getUnitDie().values()))
return;
- Asm->OutStreamer->SwitchSection(S);
+ Asm->OutStreamer->switchSection(S);
TheU->emitHeader(UseOffsets);
Asm->emitDwarfDIE(TheU->getUnitDie());
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
index a876f8ccace9..67b72f0b455d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
@@ -39,7 +39,7 @@ DwarfStringPool::getEntryImpl(AsmPrinter &Asm, StringRef Str) {
DwarfStringPool::EntryRef DwarfStringPool::getEntry(AsmPrinter &Asm,
StringRef Str) {
auto &MapEntry = getEntryImpl(Asm, Str);
- return EntryRef(MapEntry, false);
+ return EntryRef(MapEntry);
}
DwarfStringPool::EntryRef DwarfStringPool::getIndexedEntry(AsmPrinter &Asm,
@@ -47,7 +47,7 @@ DwarfStringPool::EntryRef DwarfStringPool::getIndexedEntry(AsmPrinter &Asm,
auto &MapEntry = getEntryImpl(Asm, Str);
if (!MapEntry.getValue().isIndexed())
MapEntry.getValue().Index = NumIndexedStrings++;
- return EntryRef(MapEntry, true);
+ return EntryRef(MapEntry);
}
void DwarfStringPool::emitStringOffsetsTableHeader(AsmPrinter &Asm,
@@ -55,7 +55,7 @@ void DwarfStringPool::emitStringOffsetsTableHeader(AsmPrinter &Asm,
MCSymbol *StartSym) {
if (getNumIndexedStrings() == 0)
return;
- Asm.OutStreamer->SwitchSection(Section);
+ Asm.OutStreamer->switchSection(Section);
unsigned EntrySize = Asm.getDwarfOffsetByteSize();
// We are emitting the header for a contribution to the string offsets
// table. The header consists of an entry with the contribution's
@@ -78,7 +78,7 @@ void DwarfStringPool::emit(AsmPrinter &Asm, MCSection *StrSection,
return;
// Start the dwarf str section.
- Asm.OutStreamer->SwitchSection(StrSection);
+ Asm.OutStreamer->switchSection(StrSection);
// Get all of the string pool entries and sort them by their offset.
SmallVector<const StringMapEntry<EntryTy> *, 64> Entries;
@@ -117,7 +117,7 @@ void DwarfStringPool::emit(AsmPrinter &Asm, MCSection *StrSection,
Entries[Entry.getValue().Index] = &Entry;
}
- Asm.OutStreamer->SwitchSection(OffsetSection);
+ Asm.OutStreamer->switchSection(OffsetSection);
unsigned size = Asm.getDwarfOffsetByteSize();
for (const auto &Entry : Entries)
if (UseRelativeOffsets)
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 5a2bd479f277..81238b0fe0d2 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -17,12 +17,8 @@
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/None.h"
-#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/iterator_range.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
-#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/GlobalValue.h"
@@ -32,9 +28,7 @@
#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MachineLocation.h"
#include "llvm/Support/Casting.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include <cassert>
#include <cstdint>
@@ -380,6 +374,8 @@ void DwarfUnit::addDIEEntry(DIE &Die, dwarf::Attribute Attribute,
CU = getUnitDie().getUnit();
if (!EntryCU)
EntryCU = getUnitDie().getUnit();
+ assert(EntryCU == CU || !DD->useSplitDwarf() || DD->shareAcrossDWOCUs() ||
+ !static_cast<const DwarfUnit*>(CU)->isDwoUnit());
addAttribute(Die, Attribute,
EntryCU == CU ? dwarf::DW_FORM_ref4 : dwarf::DW_FORM_ref_addr,
Entry);
@@ -596,10 +592,8 @@ DIE *DwarfUnit::createTypeDIE(const DIScope *Context, DIE &ContextDIE,
// Skip updating the accelerator tables since this is not the full type.
if (MDString *TypeId = CTy->getRawIdentifier())
DD->addDwarfTypeUnitType(getCU(), TypeId->getString(), TyDIE, CTy);
- else {
- auto X = DD->enterNonTypeUnitContext();
+ else
finishNonUnitTypeDIE(TyDIE, CTy);
- }
return &TyDIE;
}
constructTypeDIE(TyDIE, CTy);
@@ -805,7 +799,7 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy) {
// or reference types.
if (DTy->getDWARFAddressSpace())
addUInt(Buffer, dwarf::DW_AT_address_class, dwarf::DW_FORM_data4,
- DTy->getDWARFAddressSpace().getValue());
+ *DTy->getDWARFAddressSpace());
}
void DwarfUnit::constructSubprogramArguments(DIE &Buffer, DITypeRefArray Args) {
@@ -1350,6 +1344,9 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
if (SP->isRecursive())
addFlag(SPDie, dwarf::DW_AT_recursive);
+ if (!SP->getTargetFuncName().empty())
+ addString(SPDie, dwarf::DW_AT_trampoline, SP->getTargetFuncName());
+
if (DD->getDwarfVersion() >= 5 && SP->isDeleted())
addFlag(SPDie, dwarf::DW_AT_deleted);
}
@@ -1442,7 +1439,8 @@ DIE *DwarfUnit::getIndexTyDie() {
addString(*IndexTyDie, dwarf::DW_AT_name, Name);
addUInt(*IndexTyDie, dwarf::DW_AT_byte_size, None, sizeof(int64_t));
addUInt(*IndexTyDie, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
- dwarf::DW_ATE_unsigned);
+ dwarf::getArrayIndexTypeEncoding(
+ (dwarf::SourceLanguage)getLanguage()));
DD->addAccelType(*CUNode, Name, *IndexTyDie, /*Flags*/ 0);
return IndexTyDie;
}
@@ -1847,11 +1845,5 @@ void DwarfUnit::addRnglistsBase() {
}
void DwarfTypeUnit::finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) {
- addFlag(D, dwarf::DW_AT_declaration);
- StringRef Name = CTy->getName();
- if (!Name.empty())
- addString(D, dwarf::DW_AT_name, Name);
- if (Name.startswith("_STN") || !Name.contains('<'))
- addTemplateParams(D, CTy->getTemplateParams());
- getCU().createTypeDIE(CTy);
+ DD->getAddressPool().resetUsedFlag(true);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
index 39f40b172c1b..31644959bdca 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
@@ -19,7 +19,6 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
@@ -458,7 +457,7 @@ MCSymbol *EHStreamer::emitExceptionTable() {
// Sometimes we want not to emit the data into separate section (e.g. ARM
// EHABI). In this case LSDASection will be NULL.
if (LSDASection)
- Asm->OutStreamer->SwitchSection(LSDASection);
+ Asm->OutStreamer->switchSection(LSDASection);
Asm->emitAlignment(Align(4));
// Emit the LSDA.
@@ -806,7 +805,7 @@ void EHStreamer::emitTypeInfos(unsigned TTypeEncoding, MCSymbol *TTBaseLabel) {
// Emit the Catch TypeInfos.
if (VerboseAsm && !TypeInfos.empty()) {
Asm->OutStreamer->AddComment(">> Catch TypeInfos <<");
- Asm->OutStreamer->AddBlankLine();
+ Asm->OutStreamer->addBlankLine();
Entry = TypeInfos.size();
}
@@ -821,7 +820,7 @@ void EHStreamer::emitTypeInfos(unsigned TTypeEncoding, MCSymbol *TTBaseLabel) {
// Emit the Exception Specifications.
if (VerboseAsm && !FilterIds.empty()) {
Asm->OutStreamer->AddComment(">> Filter TypeInfos <<");
- Asm->OutStreamer->AddBlankLine();
+ Asm->OutStreamer->addBlankLine();
Entry = 0;
}
for (std::vector<unsigned>::const_iterator
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
index 70777f07fc6c..62fd15d89512 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
@@ -23,7 +23,6 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSymbol.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
using namespace llvm;
@@ -46,9 +45,8 @@ void ErlangGCPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
unsigned IntPtrSize = M.getDataLayout().getPointerSize();
// Put this in a custom .note section.
- OS.SwitchSection(
- AP.getObjFileLowering().getContext().getELFSection(".note.gc",
- ELF::SHT_PROGBITS, 0));
+ OS.switchSection(AP.getObjFileLowering().getContext().getELFSection(
+ ".note.gc", ELF::SHT_PROGBITS, 0));
// For each function...
for (GCModuleInfo::FuncInfoVec::iterator FI = Info.funcinfo_begin(),
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
index 3ade262d9af2..74fa30ab321b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -72,10 +72,10 @@ static void EmitCamlGlobal(const Module &M, AsmPrinter &AP, const char *Id) {
void OcamlGCMetadataPrinter::beginAssembly(Module &M, GCModuleInfo &Info,
AsmPrinter &AP) {
- AP.OutStreamer->SwitchSection(AP.getObjFileLowering().getTextSection());
+ AP.OutStreamer->switchSection(AP.getObjFileLowering().getTextSection());
EmitCamlGlobal(M, AP, "code_begin");
- AP.OutStreamer->SwitchSection(AP.getObjFileLowering().getDataSection());
+ AP.OutStreamer->switchSection(AP.getObjFileLowering().getDataSection());
EmitCamlGlobal(M, AP, "data_begin");
}
@@ -99,16 +99,16 @@ void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
AsmPrinter &AP) {
unsigned IntPtrSize = M.getDataLayout().getPointerSize();
- AP.OutStreamer->SwitchSection(AP.getObjFileLowering().getTextSection());
+ AP.OutStreamer->switchSection(AP.getObjFileLowering().getTextSection());
EmitCamlGlobal(M, AP, "code_end");
- AP.OutStreamer->SwitchSection(AP.getObjFileLowering().getDataSection());
+ AP.OutStreamer->switchSection(AP.getObjFileLowering().getDataSection());
EmitCamlGlobal(M, AP, "data_end");
// FIXME: Why does ocaml emit this??
AP.OutStreamer->emitIntValue(0, IntPtrSize);
- AP.OutStreamer->SwitchSection(AP.getObjFileLowering().getDataSection());
+ AP.OutStreamer->switchSection(AP.getObjFileLowering().getDataSection());
EmitCamlGlobal(M, AP, "frametable");
int NumDescriptors = 0;
@@ -147,7 +147,7 @@ void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
AP.OutStreamer->AddComment("live roots for " +
Twine(FI->getFunction().getName()));
- AP.OutStreamer->AddBlankLine();
+ AP.OutStreamer->addBlankLine();
for (GCFunctionInfo::iterator J = FI->begin(), JE = FI->end(); J != JE;
++J) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp
index bab187f46535..135eabc34838 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp
@@ -13,7 +13,7 @@
#include "PseudoProbePrinter.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/IR/DebugInfoMetadata.h"
-#include "llvm/IR/Module.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/PseudoProbe.h"
#include "llvm/MC/MCPseudoProbe.h"
#include "llvm/MC/MCStreamer.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp
index a17a2ca2790e..a514ff161cee 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp
@@ -12,6 +12,8 @@
//===----------------------------------------------------------------------===//
#include "WasmException.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/IR/Mangler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCStreamer.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.h
index f06de786bd76..2abbe37cb6d9 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.h
@@ -15,9 +15,12 @@
#define LLVM_LIB_CODEGEN_ASMPRINTER_WASMEXCEPTION_H
#include "EHStreamer.h"
-#include "llvm/CodeGen/AsmPrinter.h"
namespace llvm {
+class AsmPrinter;
+class MachineFunction;
+struct LandingPadInfo;
+template <typename T> class SmallVectorImpl;
class LLVM_LIBRARY_VISIBILITY WasmException : public EHStreamer {
public:
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp
index ad8432343a60..5d813b72c0b7 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp
@@ -15,11 +15,8 @@
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Metadata.h"
-#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/IR/InstrTypes.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCStreamer.h"
@@ -29,7 +26,7 @@ using namespace llvm;
WinCFGuard::WinCFGuard(AsmPrinter *A) : Asm(A) {}
-WinCFGuard::~WinCFGuard() {}
+WinCFGuard::~WinCFGuard() = default;
void WinCFGuard::endFunction(const MachineFunction *MF) {
@@ -110,19 +107,19 @@ void WinCFGuard::endModule() {
// Emit the symbol index of each GFIDs entry to form the .gfids section.
auto &OS = *Asm->OutStreamer;
- OS.SwitchSection(Asm->OutContext.getObjectFileInfo()->getGFIDsSection());
+ OS.switchSection(Asm->OutContext.getObjectFileInfo()->getGFIDsSection());
for (const MCSymbol *S : GFIDsEntries)
- OS.EmitCOFFSymbolIndex(S);
+ OS.emitCOFFSymbolIndex(S);
// Emit the symbol index of each GIATs entry to form the .giats section.
- OS.SwitchSection(Asm->OutContext.getObjectFileInfo()->getGIATsSection());
+ OS.switchSection(Asm->OutContext.getObjectFileInfo()->getGIATsSection());
for (const MCSymbol *S : GIATsEntries) {
- OS.EmitCOFFSymbolIndex(S);
+ OS.emitCOFFSymbolIndex(S);
}
// Emit the symbol index of each longjmp target to form the .gljmp section.
- OS.SwitchSection(Asm->OutContext.getObjectFileInfo()->getGLJMPSection());
+ OS.switchSection(Asm->OutContext.getObjectFileInfo()->getGLJMPSection());
for (const MCSymbol *S : LongjmpTargets) {
- OS.EmitCOFFSymbolIndex(S);
+ OS.emitCOFFSymbolIndex(S);
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
index ef57031c7294..c3ca9c92bf71 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
@@ -23,19 +23,13 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/WinEHFuncInfo.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Mangler.h"
#include "llvm/IR/Module.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/FormattedStream.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
using namespace llvm;
WinException::WinException(AsmPrinter *A) : EHStreamer(A) {
@@ -46,7 +40,7 @@ WinException::WinException(AsmPrinter *A) : EHStreamer(A) {
isThumb = Asm->TM.getTargetTriple().isThumb();
}
-WinException::~WinException() {}
+WinException::~WinException() = default;
/// endModule - Emit all exception information that should come after the
/// content.
@@ -55,13 +49,13 @@ void WinException::endModule() {
const Module *M = MMI->getModule();
for (const Function &F : *M)
if (F.hasFnAttribute("safeseh"))
- OS.EmitCOFFSafeSEH(Asm->getSymbol(&F));
+ OS.emitCOFFSafeSEH(Asm->getSymbol(&F));
if (M->getModuleFlag("ehcontguard") && !EHContTargets.empty()) {
// Emit the symbol index of each ehcont target.
- OS.SwitchSection(Asm->OutContext.getObjectFileInfo()->getGEHContSection());
+ OS.switchSection(Asm->OutContext.getObjectFileInfo()->getGEHContSection());
for (const MCSymbol *S : EHContTargets) {
- OS.EmitCOFFSymbolIndex(S);
+ OS.emitCOFFSymbolIndex(S);
}
}
}
@@ -122,7 +116,7 @@ void WinException::beginFunction(const MachineFunction *MF) {
void WinException::markFunctionEnd() {
if (isAArch64 && CurrentFuncletEntry &&
(shouldEmitMoves || shouldEmitPersonality))
- Asm->OutStreamer->EmitWinCFIFuncletOrFuncEnd();
+ Asm->OutStreamer->emitWinCFIFuncletOrFuncEnd();
}
/// endFunction - Gather and emit post-function exception information.
@@ -151,12 +145,12 @@ void WinException::endFunction(const MachineFunction *MF) {
return;
if (shouldEmitPersonality || shouldEmitLSDA) {
- Asm->OutStreamer->PushSection();
+ Asm->OutStreamer->pushSection();
// Just switch sections to the right xdata section.
MCSection *XData = Asm->OutStreamer->getAssociatedXDataSection(
Asm->OutStreamer->getCurrentSectionOnly());
- Asm->OutStreamer->SwitchSection(XData);
+ Asm->OutStreamer->switchSection(XData);
// Emit the tables appropriate to the personality function in use. If we
// don't recognize the personality, assume it uses an Itanium-style LSDA.
@@ -171,7 +165,7 @@ void WinException::endFunction(const MachineFunction *MF) {
else
emitExceptionTable();
- Asm->OutStreamer->PopSection();
+ Asm->OutStreamer->popSection();
}
if (!MF->getCatchretTargets().empty()) {
@@ -211,11 +205,11 @@ void WinException::beginFunclet(const MachineBasicBlock &MBB,
Sym = getMCSymbolForMBB(Asm, &MBB);
// Describe our funclet symbol as a function with internal linkage.
- Asm->OutStreamer->BeginCOFFSymbolDef(Sym);
- Asm->OutStreamer->EmitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_STATIC);
- Asm->OutStreamer->EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION
+ Asm->OutStreamer->beginCOFFSymbolDef(Sym);
+ Asm->OutStreamer->emitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_STATIC);
+ Asm->OutStreamer->emitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION
<< COFF::SCT_COMPLEX_TYPE_SHIFT);
- Asm->OutStreamer->EndCOFFSymbolDef();
+ Asm->OutStreamer->endCOFFSymbolDef();
// We want our funclet's entry point to be aligned such that no nops will be
// present after the label.
@@ -229,7 +223,7 @@ void WinException::beginFunclet(const MachineBasicBlock &MBB,
// Mark 'Sym' as starting our funclet.
if (shouldEmitMoves || shouldEmitPersonality) {
CurrentFuncletTextSection = Asm->OutStreamer->getCurrentSectionOnly();
- Asm->OutStreamer->EmitWinCFIStartProc(Sym);
+ Asm->OutStreamer->emitWinCFIStartProc(Sym);
}
if (shouldEmitPersonality) {
@@ -248,15 +242,15 @@ void WinException::beginFunclet(const MachineBasicBlock &MBB,
// inliner doesn't allow inlining them, this isn't a major problem in
// practice.
if (!CurrentFuncletEntry->isCleanupFuncletEntry())
- Asm->OutStreamer->EmitWinEHHandler(PersHandlerSym, true, true);
+ Asm->OutStreamer->emitWinEHHandler(PersHandlerSym, true, true);
}
}
void WinException::endFunclet() {
if (isAArch64 && CurrentFuncletEntry &&
(shouldEmitMoves || shouldEmitPersonality)) {
- Asm->OutStreamer->SwitchSection(CurrentFuncletTextSection);
- Asm->OutStreamer->EmitWinCFIFuncletOrFuncEnd();
+ Asm->OutStreamer->switchSection(CurrentFuncletTextSection);
+ Asm->OutStreamer->emitWinCFIFuncletOrFuncEnd();
}
endFuncletImpl();
}
@@ -276,7 +270,7 @@ void WinException::endFuncletImpl() {
if (Per == EHPersonality::MSVC_CXX && shouldEmitPersonality &&
!CurrentFuncletEntry->isCleanupFuncletEntry()) {
// Emit an UNWIND_INFO struct describing the prologue.
- Asm->OutStreamer->EmitWinEHHandlerData();
+ Asm->OutStreamer->emitWinEHHandlerData();
// If this is a C++ catch funclet (or the parent function),
// emit a reference to the LSDA for the parent function.
@@ -287,14 +281,14 @@ void WinException::endFuncletImpl() {
} else if (Per == EHPersonality::MSVC_TableSEH && MF->hasEHFunclets() &&
!CurrentFuncletEntry->isEHFuncletEntry()) {
// Emit an UNWIND_INFO struct describing the prologue.
- Asm->OutStreamer->EmitWinEHHandlerData();
+ Asm->OutStreamer->emitWinEHHandlerData();
// If this is the parent function in Win64 SEH, emit the LSDA immediately
// following .seh_handlerdata.
emitCSpecificHandlerTable(MF);
} else if (shouldEmitPersonality || shouldEmitLSDA) {
// Emit an UNWIND_INFO struct describing the prologue.
- Asm->OutStreamer->EmitWinEHHandlerData();
+ Asm->OutStreamer->emitWinEHHandlerData();
// In these cases, no further info is written to the .xdata section
// right here, but is written by e.g. emitExceptionTable in endFunction()
// above.
@@ -307,8 +301,8 @@ void WinException::endFuncletImpl() {
// Switch back to the funclet start .text section now that we are done
// writing to .xdata, and emit an .seh_endproc directive to mark the end of
// the function.
- Asm->OutStreamer->SwitchSection(CurrentFuncletTextSection);
- Asm->OutStreamer->EmitWinCFIEndProc();
+ Asm->OutStreamer->switchSection(CurrentFuncletTextSection);
+ Asm->OutStreamer->emitWinCFIEndProc();
}
// Let's make sure we don't try to end the same funclet twice.
@@ -699,7 +693,12 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) {
}
int UnwindHelpOffset = 0;
- if (Asm->MAI->usesWindowsCFI())
+ // TODO: The check for UnwindHelpFrameIdx against max() below (and the
+ // second check further below) can be removed if MS C++ unwinding is
+ // implemented for ARM, when test/CodeGen/ARM/Windows/wineh-basic.ll
+ // passes without the check.
+ if (Asm->MAI->usesWindowsCFI() &&
+ FuncInfo.UnwindHelpFrameIdx != std::numeric_limits<int>::max())
UnwindHelpOffset =
getFrameIndexOffset(FuncInfo.UnwindHelpFrameIdx, FuncInfo);
@@ -761,7 +760,8 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) {
AddComment("IPToStateXData");
OS.emitValue(create32bitRef(IPToStateXData), 4);
- if (Asm->MAI->usesWindowsCFI()) {
+ if (Asm->MAI->usesWindowsCFI() &&
+ FuncInfo.UnwindHelpFrameIdx != std::numeric_limits<int>::max()) {
AddComment("UnwindHelp");
OS.emitInt32(UnwindHelpOffset);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp
index 4838f6da750d..5ce6fbb5f647 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -15,7 +15,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/STLFunctionalExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/CodeGen/AtomicExpandUtils.h"
@@ -47,6 +47,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/LowerAtomic.h"
#include <cassert>
#include <cstdint>
#include <iterator>
@@ -57,71 +58,72 @@ using namespace llvm;
namespace {
- class AtomicExpand: public FunctionPass {
- const TargetLowering *TLI = nullptr;
+class AtomicExpand : public FunctionPass {
+ const TargetLowering *TLI = nullptr;
- public:
- static char ID; // Pass identification, replacement for typeid
+public:
+ static char ID; // Pass identification, replacement for typeid
- AtomicExpand() : FunctionPass(ID) {
- initializeAtomicExpandPass(*PassRegistry::getPassRegistry());
- }
+ AtomicExpand() : FunctionPass(ID) {
+ initializeAtomicExpandPass(*PassRegistry::getPassRegistry());
+ }
- bool runOnFunction(Function &F) override;
-
- private:
- bool bracketInstWithFences(Instruction *I, AtomicOrdering Order);
- IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL);
- LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI);
- bool tryExpandAtomicLoad(LoadInst *LI);
- bool expandAtomicLoadToLL(LoadInst *LI);
- bool expandAtomicLoadToCmpXchg(LoadInst *LI);
- StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
- bool expandAtomicStore(StoreInst *SI);
- bool tryExpandAtomicRMW(AtomicRMWInst *AI);
- AtomicRMWInst *convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI);
- Value *
- insertRMWLLSCLoop(IRBuilder<> &Builder, Type *ResultTy, Value *Addr,
- Align AddrAlign, AtomicOrdering MemOpOrder,
- function_ref<Value *(IRBuilder<> &, Value *)> PerformOp);
- void expandAtomicOpToLLSC(
- Instruction *I, Type *ResultTy, Value *Addr, Align AddrAlign,
- AtomicOrdering MemOpOrder,
- function_ref<Value *(IRBuilder<> &, Value *)> PerformOp);
- void expandPartwordAtomicRMW(
- AtomicRMWInst *I,
- TargetLoweringBase::AtomicExpansionKind ExpansionKind);
- AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI);
- bool expandPartwordCmpXchg(AtomicCmpXchgInst *I);
- void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI);
- void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI);
-
- AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI);
- static Value *insertRMWCmpXchgLoop(
- IRBuilder<> &Builder, Type *ResultType, Value *Addr, Align AddrAlign,
- AtomicOrdering MemOpOrder, SyncScope::ID SSID,
- function_ref<Value *(IRBuilder<> &, Value *)> PerformOp,
- CreateCmpXchgInstFun CreateCmpXchg);
- bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI);
-
- bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
- bool isIdempotentRMW(AtomicRMWInst *RMWI);
- bool simplifyIdempotentRMW(AtomicRMWInst *RMWI);
-
- bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, Align Alignment,
- Value *PointerOperand, Value *ValueOperand,
- Value *CASExpected, AtomicOrdering Ordering,
- AtomicOrdering Ordering2,
- ArrayRef<RTLIB::Libcall> Libcalls);
- void expandAtomicLoadToLibcall(LoadInst *LI);
- void expandAtomicStoreToLibcall(StoreInst *LI);
- void expandAtomicRMWToLibcall(AtomicRMWInst *I);
- void expandAtomicCASToLibcall(AtomicCmpXchgInst *I);
-
- friend bool
- llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
- CreateCmpXchgInstFun CreateCmpXchg);
- };
+ bool runOnFunction(Function &F) override;
+
+private:
+ bool bracketInstWithFences(Instruction *I, AtomicOrdering Order);
+ IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL);
+ LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI);
+ bool tryExpandAtomicLoad(LoadInst *LI);
+ bool expandAtomicLoadToLL(LoadInst *LI);
+ bool expandAtomicLoadToCmpXchg(LoadInst *LI);
+ StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
+ bool tryExpandAtomicStore(StoreInst *SI);
+ void expandAtomicStore(StoreInst *SI);
+ bool tryExpandAtomicRMW(AtomicRMWInst *AI);
+ AtomicRMWInst *convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI);
+ Value *
+ insertRMWLLSCLoop(IRBuilder<> &Builder, Type *ResultTy, Value *Addr,
+ Align AddrAlign, AtomicOrdering MemOpOrder,
+ function_ref<Value *(IRBuilder<> &, Value *)> PerformOp);
+ void
+ expandAtomicOpToLLSC(Instruction *I, Type *ResultTy, Value *Addr,
+ Align AddrAlign, AtomicOrdering MemOpOrder,
+ function_ref<Value *(IRBuilder<> &, Value *)> PerformOp);
+ void expandPartwordAtomicRMW(
+ AtomicRMWInst *I, TargetLoweringBase::AtomicExpansionKind ExpansionKind);
+ AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI);
+ bool expandPartwordCmpXchg(AtomicCmpXchgInst *I);
+ void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI);
+ void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI);
+
+ AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI);
+ static Value *
+ insertRMWCmpXchgLoop(IRBuilder<> &Builder, Type *ResultType, Value *Addr,
+ Align AddrAlign, AtomicOrdering MemOpOrder,
+ SyncScope::ID SSID,
+ function_ref<Value *(IRBuilder<> &, Value *)> PerformOp,
+ CreateCmpXchgInstFun CreateCmpXchg);
+ bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI);
+
+ bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
+ bool isIdempotentRMW(AtomicRMWInst *RMWI);
+ bool simplifyIdempotentRMW(AtomicRMWInst *RMWI);
+
+ bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, Align Alignment,
+ Value *PointerOperand, Value *ValueOperand,
+ Value *CASExpected, AtomicOrdering Ordering,
+ AtomicOrdering Ordering2,
+ ArrayRef<RTLIB::Libcall> Libcalls);
+ void expandAtomicLoadToLibcall(LoadInst *LI);
+ void expandAtomicStoreToLibcall(StoreInst *LI);
+ void expandAtomicRMWToLibcall(AtomicRMWInst *I);
+ void expandAtomicCASToLibcall(AtomicCmpXchgInst *I);
+
+ friend bool
+ llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
+ CreateCmpXchgInstFun CreateCmpXchg);
+};
} // end anonymous namespace
@@ -129,8 +131,8 @@ char AtomicExpand::ID = 0;
char &llvm::AtomicExpandID = AtomicExpand::ID;
-INITIALIZE_PASS(AtomicExpand, DEBUG_TYPE, "Expand Atomic instructions",
- false, false)
+INITIALIZE_PASS(AtomicExpand, DEBUG_TYPE, "Expand Atomic instructions", false,
+ false)
FunctionPass *llvm::createAtomicExpandPass() { return new AtomicExpand(); }
@@ -252,7 +254,8 @@ bool AtomicExpand::runOnFunction(Function &F) {
}
if (LI) {
- if (LI->getType()->isFloatingPointTy()) {
+ if (TLI->shouldCastAtomicLoadInIR(LI) ==
+ TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
// TODO: add a TLI hook to control this so that each target can
// convert to lowering the original type one at a time.
LI = convertAtomicLoadToIntegerType(LI);
@@ -262,7 +265,8 @@ bool AtomicExpand::runOnFunction(Function &F) {
MadeChange |= tryExpandAtomicLoad(LI);
} else if (SI) {
- if (SI->getValueOperand()->getType()->isFloatingPointTy()) {
+ if (TLI->shouldCastAtomicStoreInIR(SI) ==
+ TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
// TODO: add a TLI hook to control this so that each target can
// convert to lowering the original type one at a time.
SI = convertAtomicStoreToIntegerType(SI);
@@ -271,8 +275,8 @@ bool AtomicExpand::runOnFunction(Function &F) {
MadeChange = true;
}
- if (TLI->shouldExpandAtomicStoreInIR(SI))
- MadeChange |= expandAtomicStore(SI);
+ if (tryExpandAtomicStore(SI))
+ MadeChange = true;
} else if (RMWI) {
// There are two different ways of expanding RMW instructions:
// - into a load if it is idempotent
@@ -283,8 +287,8 @@ bool AtomicExpand::runOnFunction(Function &F) {
MadeChange = true;
} else {
AtomicRMWInst::BinOp Op = RMWI->getOperation();
- if (Op == AtomicRMWInst::Xchg &&
- RMWI->getValOperand()->getType()->isFloatingPointTy()) {
+ if (TLI->shouldCastAtomicRMWIInIR(RMWI) ==
+ TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
// TODO: add a TLI hook to control this so that each target can
// convert to lowering the original type one at a time.
RMWI = convertAtomicXchgToIntegerType(RMWI);
@@ -308,7 +312,7 @@ bool AtomicExpand::runOnFunction(Function &F) {
// extend convertCmpXchgToInteger for floating point too.
assert(!CASI->getCompareOperand()->getType()->isFloatingPointTy() &&
"unimplemented - floating point not legal at IR level");
- if (CASI->getCompareOperand()->getType()->isPointerTy() ) {
+ if (CASI->getCompareOperand()->getType()->isPointerTy()) {
// TODO: add a TLI hook to control this so that each target can
// convert to lowering the original type one at a time.
CASI = convertCmpXchgToIntegerType(CASI);
@@ -351,14 +355,12 @@ IntegerType *AtomicExpand::getCorrespondingIntegerType(Type *T,
/// convertAtomicStoreToIntegerType for background.
LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) {
auto *M = LI->getModule();
- Type *NewTy = getCorrespondingIntegerType(LI->getType(),
- M->getDataLayout());
+ Type *NewTy = getCorrespondingIntegerType(LI->getType(), M->getDataLayout());
IRBuilder<> Builder(LI);
Value *Addr = LI->getPointerOperand();
- Type *PT = PointerType::get(NewTy,
- Addr->getType()->getPointerAddressSpace());
+ Type *PT = PointerType::get(NewTy, Addr->getType()->getPointerAddressSpace());
Value *NewAddr = Builder.CreateBitCast(Addr, PT);
auto *NewLI = Builder.CreateLoad(NewTy, NewAddr);
@@ -385,7 +387,9 @@ AtomicExpand::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) {
Value *Val = RMWI->getValOperand();
Type *PT = PointerType::get(NewTy, RMWI->getPointerAddressSpace());
Value *NewAddr = Builder.CreateBitCast(Addr, PT);
- Value *NewVal = Builder.CreateBitCast(Val, NewTy);
+ Value *NewVal = Val->getType()->isPointerTy()
+ ? Builder.CreatePtrToInt(Val, NewTy)
+ : Builder.CreateBitCast(Val, NewTy);
auto *NewRMWI =
Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, NewAddr, NewVal,
@@ -393,7 +397,9 @@ AtomicExpand::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) {
NewRMWI->setVolatile(RMWI->isVolatile());
LLVM_DEBUG(dbgs() << "Replaced " << *RMWI << " with " << *NewRMWI << "\n");
- Value *NewRVal = Builder.CreateBitCast(NewRMWI, RMWI->getType());
+ Value *NewRVal = RMWI->getType()->isPointerTy()
+ ? Builder.CreateIntToPtr(NewRMWI, RMWI->getType())
+ : Builder.CreateBitCast(NewRMWI, RMWI->getType());
RMWI->replaceAllUsesWith(NewRVal);
RMWI->eraseFromParent();
return NewRMWI;
@@ -413,11 +419,29 @@ bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) {
return expandAtomicLoadToLL(LI);
case TargetLoweringBase::AtomicExpansionKind::CmpXChg:
return expandAtomicLoadToCmpXchg(LI);
+ case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
+ LI->setAtomic(AtomicOrdering::NotAtomic);
+ return true;
default:
llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
}
}
+bool AtomicExpand::tryExpandAtomicStore(StoreInst *SI) {
+ switch (TLI->shouldExpandAtomicStoreInIR(SI)) {
+ case TargetLoweringBase::AtomicExpansionKind::None:
+ return false;
+ case TargetLoweringBase::AtomicExpansionKind::Expand:
+ expandAtomicStore(SI);
+ return true;
+ case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
+ SI->setAtomic(AtomicOrdering::NotAtomic);
+ return true;
+ default:
+ llvm_unreachable("Unhandled case in tryExpandAtomicStore");
+ }
+}
+
bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) {
IRBuilder<> Builder(LI);
@@ -471,8 +495,7 @@ StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) {
Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy);
Value *Addr = SI->getPointerOperand();
- Type *PT = PointerType::get(NewTy,
- Addr->getType()->getPointerAddressSpace());
+ Type *PT = PointerType::get(NewTy, Addr->getType()->getPointerAddressSpace());
Value *NewAddr = Builder.CreateBitCast(Addr, PT);
StoreInst *NewSI = Builder.CreateStore(NewVal, NewAddr);
@@ -484,7 +507,7 @@ StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) {
return NewSI;
}
-bool AtomicExpand::expandAtomicStore(StoreInst *SI) {
+void AtomicExpand::expandAtomicStore(StoreInst *SI) {
// This function is only called on atomic stores that are too large to be
// atomic if implemented as a native store. So we replace them by an
// atomic swap, that can be implemented for example as a ldrex/strex on ARM
@@ -498,7 +521,7 @@ bool AtomicExpand::expandAtomicStore(StoreInst *SI) {
SI->eraseFromParent();
// Now we have an appropriate swap instruction, lower it as usual.
- return tryExpandAtomicRMW(AI);
+ tryExpandAtomicRMW(AI);
}
static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr,
@@ -508,6 +531,7 @@ static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr,
Type *OrigTy = NewVal->getType();
// This code can go away when cmpxchg supports FP types.
+ assert(!OrigTy->isPointerTy());
bool NeedBitcast = OrigTy->isFloatingPointTy();
if (NeedBitcast) {
IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits());
@@ -527,47 +551,6 @@ static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr,
NewLoaded = Builder.CreateBitCast(NewLoaded, OrigTy);
}
-/// Emit IR to implement the given atomicrmw operation on values in registers,
-/// returning the new value.
-static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder,
- Value *Loaded, Value *Inc) {
- Value *NewVal;
- switch (Op) {
- case AtomicRMWInst::Xchg:
- return Inc;
- case AtomicRMWInst::Add:
- return Builder.CreateAdd(Loaded, Inc, "new");
- case AtomicRMWInst::Sub:
- return Builder.CreateSub(Loaded, Inc, "new");
- case AtomicRMWInst::And:
- return Builder.CreateAnd(Loaded, Inc, "new");
- case AtomicRMWInst::Nand:
- return Builder.CreateNot(Builder.CreateAnd(Loaded, Inc), "new");
- case AtomicRMWInst::Or:
- return Builder.CreateOr(Loaded, Inc, "new");
- case AtomicRMWInst::Xor:
- return Builder.CreateXor(Loaded, Inc, "new");
- case AtomicRMWInst::Max:
- NewVal = Builder.CreateICmpSGT(Loaded, Inc);
- return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
- case AtomicRMWInst::Min:
- NewVal = Builder.CreateICmpSLE(Loaded, Inc);
- return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
- case AtomicRMWInst::UMax:
- NewVal = Builder.CreateICmpUGT(Loaded, Inc);
- return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
- case AtomicRMWInst::UMin:
- NewVal = Builder.CreateICmpULE(Loaded, Inc);
- return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
- case AtomicRMWInst::FAdd:
- return Builder.CreateFAdd(Loaded, Inc, "new");
- case AtomicRMWInst::FSub:
- return Builder.CreateFSub(Loaded, Inc, "new");
- default:
- llvm_unreachable("Unknown atomic op");
- }
-}
-
bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
LLVMContext &Ctx = AI->getModule()->getContext();
TargetLowering::AtomicExpansionKind Kind = TLI->shouldExpandAtomicRMWInIR(AI);
@@ -582,8 +565,8 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
TargetLoweringBase::AtomicExpansionKind::LLSC);
} else {
auto PerformOp = [&](IRBuilder<> &Builder, Value *Loaded) {
- return performAtomicOp(AI->getOperation(), Builder, Loaded,
- AI->getValOperand());
+ return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
+ AI->getValOperand());
};
expandAtomicOpToLLSC(AI, AI->getType(), AI->getPointerOperand(),
AI->getAlign(), AI->getOrdering(), PerformOp);
@@ -621,6 +604,12 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
expandAtomicRMWToMaskedIntrinsic(AI);
return true;
}
+ case TargetLoweringBase::AtomicExpansionKind::BitTestIntrinsic: {
+ TLI->emitBitTestAtomicRMWIntrinsic(AI);
+ return true;
+ }
+ case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
+ return lowerAtomicRMWInst(AI);
default:
llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
}
@@ -703,7 +692,7 @@ static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I,
PMV.AlignedAddr = Addr;
PMV.AlignedAddrAlignment = AddrAlign;
PMV.ShiftAmt = ConstantInt::get(PMV.ValueType, 0);
- PMV.Mask = ConstantInt::get(PMV.ValueType, ~0);
+ PMV.Mask = ConstantInt::get(PMV.ValueType, ~0, /*isSigned*/ true);
return PMV;
}
@@ -787,7 +776,7 @@ static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op,
case AtomicRMWInst::Sub:
case AtomicRMWInst::Nand: {
// The other arithmetic ops need to be masked into place.
- Value *NewVal = performAtomicOp(Op, Builder, Loaded, Shifted_Inc);
+ Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded, Shifted_Inc);
Value *NewVal_Masked = Builder.CreateAnd(NewVal, PMV.Mask);
Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Masked);
@@ -801,7 +790,7 @@ static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op,
// truncate down to the original size, and expand out again after
// doing the operation.
Value *Loaded_Extract = extractMaskedValue(Builder, Loaded, PMV);
- Value *NewVal = performAtomicOp(Op, Builder, Loaded_Extract, Inc);
+ Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded_Extract, Inc);
Value *FinalVal = insertMaskedValue(Builder, Loaded, NewVal, PMV);
return FinalVal;
}
@@ -840,9 +829,8 @@ void AtomicExpand::expandPartwordAtomicRMW(
Value *OldResult;
if (ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg) {
OldResult = insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr,
- PMV.AlignedAddrAlignment, MemOpOrder,
- SSID, PerformPartwordOp,
- createCmpXchgInstFun);
+ PMV.AlignedAddrAlignment, MemOpOrder, SSID,
+ PerformPartwordOp, createCmpXchgInstFun);
} else {
assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::LLSC);
OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr,
@@ -1106,7 +1094,7 @@ Value *AtomicExpand::insertRMWLLSCLoop(
// [...]
BasicBlock *ExitBB =
BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
- BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
+ BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
// The split call above "helpfully" added a branch at the end of BB (to the
// wrong place).
@@ -1135,7 +1123,8 @@ Value *AtomicExpand::insertRMWLLSCLoop(
/// IR. As a migration step, we convert back to what use to be the standard
/// way to represent a pointer cmpxchg so that we can update backends one by
/// one.
-AtomicCmpXchgInst *AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) {
+AtomicCmpXchgInst *
+AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) {
auto *M = CI->getModule();
Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(),
M->getDataLayout());
@@ -1143,8 +1132,7 @@ AtomicCmpXchgInst *AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *
IRBuilder<> Builder(CI);
Value *Addr = CI->getPointerOperand();
- Type *PT = PointerType::get(NewTy,
- Addr->getType()->getPointerAddressSpace());
+ Type *PT = PointerType::get(NewTy, Addr->getType()->getPointerAddressSpace());
Value *NewAddr = Builder.CreateBitCast(Addr, PT);
Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy);
@@ -1305,9 +1293,8 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
LoadedTryStore->addIncoming(UnreleasedLoad, ReleasingStoreBB);
Value *NewValueInsert =
insertMaskedValue(Builder, LoadedTryStore, CI->getNewValOperand(), PMV);
- Value *StoreSuccess =
- TLI->emitStoreConditional(Builder, NewValueInsert, PMV.AlignedAddr,
- MemOpOrder);
+ Value *StoreSuccess = TLI->emitStoreConditional(Builder, NewValueInsert,
+ PMV.AlignedAddr, MemOpOrder);
StoreSuccess = Builder.CreateICmpEQ(
StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
@@ -1418,27 +1405,27 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
return true;
}
-bool AtomicExpand::isIdempotentRMW(AtomicRMWInst* RMWI) {
+bool AtomicExpand::isIdempotentRMW(AtomicRMWInst *RMWI) {
auto C = dyn_cast<ConstantInt>(RMWI->getValOperand());
- if(!C)
+ if (!C)
return false;
AtomicRMWInst::BinOp Op = RMWI->getOperation();
- switch(Op) {
- case AtomicRMWInst::Add:
- case AtomicRMWInst::Sub:
- case AtomicRMWInst::Or:
- case AtomicRMWInst::Xor:
- return C->isZero();
- case AtomicRMWInst::And:
- return C->isMinusOne();
- // FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/...
- default:
- return false;
+ switch (Op) {
+ case AtomicRMWInst::Add:
+ case AtomicRMWInst::Sub:
+ case AtomicRMWInst::Or:
+ case AtomicRMWInst::Xor:
+ return C->isZero();
+ case AtomicRMWInst::And:
+ return C->isMinusOne();
+ // FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/...
+ default:
+ return false;
}
}
-bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst* RMWI) {
+bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst *RMWI) {
if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
tryExpandAtomicLoad(ResultingLoad);
return true;
@@ -1524,6 +1511,8 @@ bool AtomicExpand::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic:
expandAtomicCmpXchgToMaskedIntrinsic(CI);
return true;
+ case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
+ return lowerAtomicCmpXchgInst(CI);
}
}
@@ -1535,8 +1524,8 @@ bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
Builder, AI->getType(), AI->getPointerOperand(), AI->getAlign(),
AI->getOrdering(), AI->getSyncScopeID(),
[&](IRBuilder<> &Builder, Value *Loaded) {
- return performAtomicOp(AI->getOperation(), Builder, Loaded,
- AI->getValOperand());
+ return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
+ AI->getValOperand());
},
CreateCmpXchg);
@@ -1738,11 +1727,21 @@ bool AtomicExpand::expandAtomicOpToLibcall(
RTLIB::Libcall RTLibType;
if (UseSizedLibcall) {
switch (Size) {
- case 1: RTLibType = Libcalls[1]; break;
- case 2: RTLibType = Libcalls[2]; break;
- case 4: RTLibType = Libcalls[3]; break;
- case 8: RTLibType = Libcalls[4]; break;
- case 16: RTLibType = Libcalls[5]; break;
+ case 1:
+ RTLibType = Libcalls[1];
+ break;
+ case 2:
+ RTLibType = Libcalls[2];
+ break;
+ case 4:
+ RTLibType = Libcalls[3];
+ break;
+ case 8:
+ RTLibType = Libcalls[4];
+ break;
+ case 16:
+ RTLibType = Libcalls[5];
+ break;
}
} else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) {
RTLibType = Libcalls[0];
@@ -1806,8 +1805,8 @@ bool AtomicExpand::expandAtomicOpToLibcall(
// that property, we'd need to extend this mechanism to support AS-specific
// families of atomic intrinsics.
auto PtrTypeAS = PointerOperand->getType()->getPointerAddressSpace();
- Value *PtrVal = Builder.CreateBitCast(PointerOperand,
- Type::getInt8PtrTy(Ctx, PtrTypeAS));
+ Value *PtrVal =
+ Builder.CreateBitCast(PointerOperand, Type::getInt8PtrTy(Ctx, PtrTypeAS));
PtrVal = Builder.CreateAddrSpaceCast(PtrVal, Type::getInt8PtrTy(Ctx));
Args.push_back(PtrVal);
@@ -1815,11 +1814,10 @@ bool AtomicExpand::expandAtomicOpToLibcall(
if (CASExpected) {
AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType());
AllocaCASExpected->setAlignment(AllocaAlignment);
- unsigned AllocaAS = AllocaCASExpected->getType()->getPointerAddressSpace();
+ unsigned AllocaAS = AllocaCASExpected->getType()->getPointerAddressSpace();
- AllocaCASExpected_i8 =
- Builder.CreateBitCast(AllocaCASExpected,
- Type::getInt8PtrTy(Ctx, AllocaAS));
+ AllocaCASExpected_i8 = Builder.CreateBitCast(
+ AllocaCASExpected, Type::getInt8PtrTy(Ctx, AllocaAS));
Builder.CreateLifetimeStart(AllocaCASExpected_i8, SizeVal64);
Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment);
Args.push_back(AllocaCASExpected_i8);
@@ -1846,9 +1844,9 @@ bool AtomicExpand::expandAtomicOpToLibcall(
if (!CASExpected && HasResult && !UseSizedLibcall) {
AllocaResult = AllocaBuilder.CreateAlloca(I->getType());
AllocaResult->setAlignment(AllocaAlignment);
- unsigned AllocaAS = AllocaResult->getType()->getPointerAddressSpace();
+ unsigned AllocaAS = AllocaResult->getType()->getPointerAddressSpace();
AllocaResult_i8 =
- Builder.CreateBitCast(AllocaResult, Type::getInt8PtrTy(Ctx, AllocaAS));
+ Builder.CreateBitCast(AllocaResult, Type::getInt8PtrTy(Ctx, AllocaAS));
Builder.CreateLifetimeStart(AllocaResult_i8, SizeVal64);
Args.push_back(AllocaResult_i8);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSections.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSections.cpp
index c1901bc46d72..f05f5b9f9947 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSections.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSections.cpp
@@ -60,7 +60,7 @@
// Basic Block Labels
// ==================
//
-// With -fbasic-block-sections=labels, we emit the offsets of BB addresses of
+// With -fbasic-block-sections=labels, we encode the offsets of BB addresses of
// every function into the .llvm_bb_addr_map section. Along with the function
// symbols, this allows for mapping of virtual addresses in PMU profiles back to
// the corresponding basic blocks. This logic is implemented in AsmPrinter. This
@@ -69,26 +69,17 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/Optional.h"
-#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"
#include "llvm/CodeGen/BasicBlockSectionUtils.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/InitializePasses.h"
-#include "llvm/Support/Error.h"
-#include "llvm/Support/LineIterator.h"
-#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Target/TargetMachine.h"
-using llvm::SmallSet;
-using llvm::SmallVector;
-using llvm::StringMap;
-using llvm::StringRef;
using namespace llvm;
// Placing the cold clusters in a separate section mitigates against poor
@@ -108,41 +99,11 @@ cl::opt<bool> BBSectionsDetectSourceDrift(
namespace {
-// This struct represents the cluster information for a machine basic block.
-struct BBClusterInfo {
- // MachineBasicBlock ID.
- unsigned MBBNumber;
- // Cluster ID this basic block belongs to.
- unsigned ClusterID;
- // Position of basic block within the cluster.
- unsigned PositionInCluster;
-};
-
-using ProgramBBClusterInfoMapTy = StringMap<SmallVector<BBClusterInfo, 4>>;
-
class BasicBlockSections : public MachineFunctionPass {
public:
static char ID;
- // This contains the basic-block-sections profile.
- const MemoryBuffer *MBuf = nullptr;
-
- // This encapsulates the BB cluster information for the whole program.
- //
- // For every function name, it contains the cluster information for (all or
- // some of) its basic blocks. The cluster information for every basic block
- // includes its cluster ID along with the position of the basic block in that
- // cluster.
- ProgramBBClusterInfoMapTy ProgramBBClusterInfo;
-
- // Some functions have alias names. We use this map to find the main alias
- // name for which we have mapping in ProgramBBClusterInfo.
- StringMap<StringRef> FuncAliasMap;
-
- BasicBlockSections(const MemoryBuffer *Buf)
- : MachineFunctionPass(ID), MBuf(Buf) {
- initializeBasicBlockSectionsPass(*PassRegistry::getPassRegistry());
- };
+ BasicBlockSectionsProfileReader *BBSectionsProfileReader = nullptr;
BasicBlockSections() : MachineFunctionPass(ID) {
initializeBasicBlockSectionsPass(*PassRegistry::getPassRegistry());
@@ -154,9 +115,6 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override;
- /// Read profiles of basic blocks if available here.
- bool doInitialization(Module &M) override;
-
/// Identify basic blocks that need separate sections and prepare to emit them
/// accordingly.
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -206,21 +164,18 @@ static void updateBranches(
// This function provides the BBCluster information associated with a function.
// Returns true if a valid association exists and false otherwise.
-static bool getBBClusterInfoForFunction(
- const MachineFunction &MF, const StringMap<StringRef> FuncAliasMap,
- const ProgramBBClusterInfoMapTy &ProgramBBClusterInfo,
+bool getBBClusterInfoForFunction(
+ const MachineFunction &MF,
+ BasicBlockSectionsProfileReader *BBSectionsProfileReader,
std::vector<Optional<BBClusterInfo>> &V) {
- // Get the main alias name for the function.
- auto FuncName = MF.getName();
- auto R = FuncAliasMap.find(FuncName);
- StringRef AliasName = R == FuncAliasMap.end() ? FuncName : R->second;
// Find the assoicated cluster information.
- auto P = ProgramBBClusterInfo.find(AliasName);
- if (P == ProgramBBClusterInfo.end())
+ std::pair<bool, SmallVector<BBClusterInfo, 4>> P =
+ BBSectionsProfileReader->getBBClusterInfoForFunction(MF.getName());
+ if (!P.first)
return false;
- if (P->second.empty()) {
+ if (P.second.empty()) {
// This indicates that sections are desired for all basic blocks of this
// function. We clear the BBClusterInfo vector to denote this.
V.clear();
@@ -228,7 +183,7 @@ static bool getBBClusterInfoForFunction(
}
V.resize(MF.getNumBlockIDs());
- for (auto bbClusterInfo : P->second) {
+ for (auto bbClusterInfo : P.second) {
// Bail out if the cluster information contains invalid MBB numbers.
if (bbClusterInfo.MBBNumber >= MF.getNumBlockIDs())
return false;
@@ -266,7 +221,7 @@ assignSections(MachineFunction &MF,
// set every basic block's section ID equal to its number (basic block
// id). This further ensures that basic blocks are ordered canonically.
MBB.setSectionID({static_cast<unsigned int>(MBB.getNumber())});
- } else if (FuncBBClusterInfo[MBB.getNumber()].hasValue())
+ } else if (FuncBBClusterInfo[MBB.getNumber()])
MBB.setSectionID(FuncBBClusterInfo[MBB.getNumber()]->ClusterID);
else {
// BB goes into the special cold section if it is not specified in the
@@ -279,9 +234,8 @@ assignSections(MachineFunction &MF,
// If we already have one cluster containing eh_pads, this must be updated
// to ExceptionSectionID. Otherwise, we set it equal to the current
// section ID.
- EHPadsSectionID = EHPadsSectionID.hasValue()
- ? MBBSectionID::ExceptionSectionID
- : MBB.getSectionID();
+ EHPadsSectionID = EHPadsSectionID ? MBBSectionID::ExceptionSectionID
+ : MBB.getSectionID();
}
}
@@ -290,7 +244,7 @@ assignSections(MachineFunction &MF,
if (EHPadsSectionID == MBBSectionID::ExceptionSectionID)
for (auto &MBB : MF)
if (MBB.isEHPad())
- MBB.setSectionID(EHPadsSectionID.getValue());
+ MBB.setSectionID(*EHPadsSectionID);
}
void llvm::sortBasicBlocksAndUpdateBranches(
@@ -377,9 +331,11 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) {
return true;
}
+ BBSectionsProfileReader = &getAnalysis<BasicBlockSectionsProfileReader>();
+
std::vector<Optional<BBClusterInfo>> FuncBBClusterInfo;
if (BBSectionsType == BasicBlockSection::List &&
- !getBBClusterInfoForFunction(MF, FuncAliasMap, ProgramBBClusterInfo,
+ !getBBClusterInfoForFunction(MF, BBSectionsProfileReader,
FuncBBClusterInfo))
return true;
MF.setBBSectionsType(BBSectionsType);
@@ -427,107 +383,12 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) {
return true;
}
-// Basic Block Sections can be enabled for a subset of machine basic blocks.
-// This is done by passing a file containing names of functions for which basic
-// block sections are desired. Additionally, machine basic block ids of the
-// functions can also be specified for a finer granularity. Moreover, a cluster
-// of basic blocks could be assigned to the same section.
-// A file with basic block sections for all of function main and three blocks
-// for function foo (of which 1 and 2 are placed in a cluster) looks like this:
-// ----------------------------
-// list.txt:
-// !main
-// !foo
-// !!1 2
-// !!4
-static Error getBBClusterInfo(const MemoryBuffer *MBuf,
- ProgramBBClusterInfoMapTy &ProgramBBClusterInfo,
- StringMap<StringRef> &FuncAliasMap) {
- assert(MBuf);
- line_iterator LineIt(*MBuf, /*SkipBlanks=*/true, /*CommentMarker=*/'#');
-
- auto invalidProfileError = [&](auto Message) {
- return make_error<StringError>(
- Twine("Invalid profile " + MBuf->getBufferIdentifier() + " at line " +
- Twine(LineIt.line_number()) + ": " + Message),
- inconvertibleErrorCode());
- };
-
- auto FI = ProgramBBClusterInfo.end();
-
- // Current cluster ID corresponding to this function.
- unsigned CurrentCluster = 0;
- // Current position in the current cluster.
- unsigned CurrentPosition = 0;
-
- // Temporary set to ensure every basic block ID appears once in the clusters
- // of a function.
- SmallSet<unsigned, 4> FuncBBIDs;
-
- for (; !LineIt.is_at_eof(); ++LineIt) {
- StringRef S(*LineIt);
- if (S[0] == '@')
- continue;
- // Check for the leading "!"
- if (!S.consume_front("!") || S.empty())
- break;
- // Check for second "!" which indicates a cluster of basic blocks.
- if (S.consume_front("!")) {
- if (FI == ProgramBBClusterInfo.end())
- return invalidProfileError(
- "Cluster list does not follow a function name specifier.");
- SmallVector<StringRef, 4> BBIndexes;
- S.split(BBIndexes, ' ');
- // Reset current cluster position.
- CurrentPosition = 0;
- for (auto BBIndexStr : BBIndexes) {
- unsigned long long BBIndex;
- if (getAsUnsignedInteger(BBIndexStr, 10, BBIndex))
- return invalidProfileError(Twine("Unsigned integer expected: '") +
- BBIndexStr + "'.");
- if (!FuncBBIDs.insert(BBIndex).second)
- return invalidProfileError(Twine("Duplicate basic block id found '") +
- BBIndexStr + "'.");
- if (!BBIndex && CurrentPosition)
- return invalidProfileError("Entry BB (0) does not begin a cluster.");
-
- FI->second.emplace_back(BBClusterInfo{
- ((unsigned)BBIndex), CurrentCluster, CurrentPosition++});
- }
- CurrentCluster++;
- } else { // This is a function name specifier.
- // Function aliases are separated using '/'. We use the first function
- // name for the cluster info mapping and delegate all other aliases to
- // this one.
- SmallVector<StringRef, 4> Aliases;
- S.split(Aliases, '/');
- for (size_t i = 1; i < Aliases.size(); ++i)
- FuncAliasMap.try_emplace(Aliases[i], Aliases.front());
-
- // Prepare for parsing clusters of this function name.
- // Start a new cluster map for this function name.
- FI = ProgramBBClusterInfo.try_emplace(Aliases.front()).first;
- CurrentCluster = 0;
- FuncBBIDs.clear();
- }
- }
- return Error::success();
-}
-
-bool BasicBlockSections::doInitialization(Module &M) {
- if (!MBuf)
- return false;
- if (auto Err = getBBClusterInfo(MBuf, ProgramBBClusterInfo, FuncAliasMap))
- report_fatal_error(std::move(Err));
- return false;
-}
-
void BasicBlockSections::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
+ AU.addRequired<BasicBlockSectionsProfileReader>();
MachineFunctionPass::getAnalysisUsage(AU);
}
-MachineFunctionPass *
-llvm::createBasicBlockSectionsPass(const MemoryBuffer *Buf) {
- return new BasicBlockSections(Buf);
+MachineFunctionPass *llvm::createBasicBlockSectionsPass() {
+ return new BasicBlockSections();
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
new file mode 100644
index 000000000000..c2acf115998b
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -0,0 +1,144 @@
+//===-- BasicBlockSectionsProfileReader.cpp -------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the basic block sections profile reader pass. It parses
+// and stores the basic block sections profile file (which is specified via the
+// `-basic-block-sections` flag).
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/LineIterator.h"
+#include "llvm/Support/MemoryBuffer.h"
+
+using namespace llvm;
+
+char BasicBlockSectionsProfileReader::ID = 0;
+INITIALIZE_PASS(BasicBlockSectionsProfileReader, "bbsections-profile-reader",
+ "Reads and parses a basic block sections profile.", false,
+ false)
+
+bool BasicBlockSectionsProfileReader::isFunctionHot(StringRef FuncName) const {
+ return getBBClusterInfoForFunction(FuncName).first;
+}
+
+std::pair<bool, SmallVector<BBClusterInfo>>
+BasicBlockSectionsProfileReader::getBBClusterInfoForFunction(
+ StringRef FuncName) const {
+ std::pair<bool, SmallVector<BBClusterInfo>> cluster_info(false, {});
+ auto R = ProgramBBClusterInfo.find(getAliasName(FuncName));
+ if (R != ProgramBBClusterInfo.end()) {
+ cluster_info.second = R->second;
+ cluster_info.first = true;
+ }
+ return cluster_info;
+}
+
+// Basic Block Sections can be enabled for a subset of machine basic blocks.
+// This is done by passing a file containing names of functions for which basic
+// block sections are desired. Additionally, machine basic block ids of the
+// functions can also be specified for a finer granularity. Moreover, a cluster
+// of basic blocks could be assigned to the same section.
+// A file with basic block sections for all of function main and three blocks
+// for function foo (of which 1 and 2 are placed in a cluster) looks like this:
+// ----------------------------
+// list.txt:
+// !main
+// !foo
+// !!1 2
+// !!4
+static Error getBBClusterInfo(const MemoryBuffer *MBuf,
+ ProgramBBClusterInfoMapTy &ProgramBBClusterInfo,
+ StringMap<StringRef> &FuncAliasMap) {
+ assert(MBuf);
+ line_iterator LineIt(*MBuf, /*SkipBlanks=*/true, /*CommentMarker=*/'#');
+
+ auto invalidProfileError = [&](auto Message) {
+ return make_error<StringError>(
+ Twine("Invalid profile " + MBuf->getBufferIdentifier() + " at line " +
+ Twine(LineIt.line_number()) + ": " + Message),
+ inconvertibleErrorCode());
+ };
+
+ auto FI = ProgramBBClusterInfo.end();
+
+ // Current cluster ID corresponding to this function.
+ unsigned CurrentCluster = 0;
+ // Current position in the current cluster.
+ unsigned CurrentPosition = 0;
+
+ // Temporary set to ensure every basic block ID appears once in the clusters
+ // of a function.
+ SmallSet<unsigned, 4> FuncBBIDs;
+
+ for (; !LineIt.is_at_eof(); ++LineIt) {
+ StringRef S(*LineIt);
+ if (S[0] == '@')
+ continue;
+ // Check for the leading "!"
+ if (!S.consume_front("!") || S.empty())
+ break;
+ // Check for second "!" which indicates a cluster of basic blocks.
+ if (S.consume_front("!")) {
+ if (FI == ProgramBBClusterInfo.end())
+ return invalidProfileError(
+ "Cluster list does not follow a function name specifier.");
+ SmallVector<StringRef, 4> BBIndexes;
+ S.split(BBIndexes, ' ');
+ // Reset current cluster position.
+ CurrentPosition = 0;
+ for (auto BBIndexStr : BBIndexes) {
+ unsigned long long BBIndex;
+ if (getAsUnsignedInteger(BBIndexStr, 10, BBIndex))
+ return invalidProfileError(Twine("Unsigned integer expected: '") +
+ BBIndexStr + "'.");
+ if (!FuncBBIDs.insert(BBIndex).second)
+ return invalidProfileError(Twine("Duplicate basic block id found '") +
+ BBIndexStr + "'.");
+ if (!BBIndex && CurrentPosition)
+ return invalidProfileError("Entry BB (0) does not begin a cluster.");
+
+ FI->second.emplace_back(BBClusterInfo{
+ ((unsigned)BBIndex), CurrentCluster, CurrentPosition++});
+ }
+ CurrentCluster++;
+ } else { // This is a function name specifier.
+ // Function aliases are separated using '/'. We use the first function
+ // name for the cluster info mapping and delegate all other aliases to
+ // this one.
+ SmallVector<StringRef, 4> Aliases;
+ S.split(Aliases, '/');
+ for (size_t i = 1; i < Aliases.size(); ++i)
+ FuncAliasMap.try_emplace(Aliases[i], Aliases.front());
+
+ // Prepare for parsing clusters of this function name.
+ // Start a new cluster map for this function name.
+ FI = ProgramBBClusterInfo.try_emplace(Aliases.front()).first;
+ CurrentCluster = 0;
+ FuncBBIDs.clear();
+ }
+ }
+ return Error::success();
+}
+
+void BasicBlockSectionsProfileReader::initializePass() {
+ if (!MBuf)
+ return;
+ if (auto Err = getBBClusterInfo(MBuf, ProgramBBClusterInfo, FuncAliasMap))
+ report_fatal_error(std::move(Err));
+}
+
+ImmutablePass *
+llvm::createBasicBlockSectionsProfileReaderPass(const MemoryBuffer *Buf) {
+ return new BasicBlockSectionsProfileReader(Buf);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp
index 0ff67f7ca00a..07be03d2dab9 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp
@@ -24,6 +24,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/MBFIWrapper.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -32,11 +33,9 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineSizeOpts.h"
-#include "llvm/CodeGen/MBFIWrapper.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
@@ -105,6 +104,11 @@ namespace {
AU.addRequired<TargetPassConfig>();
MachineFunctionPass::getAnalysisUsage(AU);
}
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoPHIs);
+ }
};
} // end anonymous namespace
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.h b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.h
index 95d5dcfbbd0f..d0b6ed5ebe05 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.h
@@ -14,7 +14,6 @@
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/Support/Compiler.h"
-#include <cstdint>
#include <vector>
namespace llvm {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp
index eda0f37fdeb7..29508f8f35a6 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp
@@ -24,7 +24,6 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Format.h"
-#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <cstdint>
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BreakFalseDeps.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BreakFalseDeps.cpp
index 558700bd9b3b..57170c58db14 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/BreakFalseDeps.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/BreakFalseDeps.cpp
@@ -19,11 +19,13 @@
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/ReachingDefAnalysis.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/InitializePasses.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegister.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CFIFixup.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CFIFixup.cpp
new file mode 100644
index 000000000000..837dbd77d073
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CFIFixup.cpp
@@ -0,0 +1,225 @@
+//===------ CFIFixup.cpp - Insert CFI remember/restore instructions -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+
+// This pass inserts the necessary instructions to adjust for the inconsistency
+// of the call-frame information caused by final machine basic block layout.
+// The pass relies in constraints LLVM imposes on the placement of
+// save/restore points (cf. ShrinkWrap):
+// * there is a single basic block, containing the function prologue
+// * possibly multiple epilogue blocks, where each epilogue block is
+// complete and self-contained, i.e. CSR restore instructions (and the
+// corresponding CFI instructions are not split across two or more blocks.
+// * prologue and epilogue blocks are outside of any loops
+// Thus, during execution, at the beginning and at the end of each basic block
+// the function can be in one of two states:
+// - "has a call frame", if the function has executed the prologue, and
+// has not executed any epilogue
+// - "does not have a call frame", if the function has not executed the
+// prologue, or has executed an epilogue
+// which can be computed by a single RPO traversal.
+
+// In order to accommodate backends which do not generate unwind info in
+// epilogues we compute an additional property "strong no call frame on entry",
+// which is set for the entry point of the function and for every block
+// reachable from the entry along a path that does not execute the prologue. If
+// this property holds, it takes precedence over the "has a call frame"
+// property.
+
+// From the point of view of the unwind tables, the "has/does not have call
+// frame" state at beginning of each block is determined by the state at the end
+// of the previous block, in layout order. Where these states differ, we insert
+// compensating CFI instructions, which come in two flavours:
+
+// - CFI instructions, which reset the unwind table state to the initial one.
+// This is done by a target specific hook and is expected to be trivial
+// to implement, for example it could be:
+// .cfi_def_cfa <sp>, 0
+// .cfi_same_value <rN>
+// .cfi_same_value <rN-1>
+// ...
+// where <rN> are the callee-saved registers.
+// - CFI instructions, which reset the unwind table state to the one
+// created by the function prologue. These are
+// .cfi_restore_state
+// .cfi_remember_state
+// In this case we also insert a `.cfi_remember_state` after the last CFI
+// instruction in the function prologue.
+//
+// Known limitations:
+// * the pass cannot handle an epilogue preceding the prologue in the basic
+// block layout
+// * the pass does not handle functions where SP is used as a frame pointer and
+// SP adjustments up and down are done in different basic blocks (TODO)
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/CFIFixup.h"
+
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "cfi-fixup"
+
+char CFIFixup::ID = 0;
+
+INITIALIZE_PASS(CFIFixup, "cfi-fixup",
+ "Insert CFI remember/restore state instructions", false, false)
+FunctionPass *llvm::createCFIFixup() { return new CFIFixup(); }
+
+static bool isPrologueCFIInstruction(const MachineInstr &MI) {
+ return MI.getOpcode() == TargetOpcode::CFI_INSTRUCTION &&
+ MI.getFlag(MachineInstr::FrameSetup);
+}
+
+static bool containsPrologue(const MachineBasicBlock &MBB) {
+ return llvm::any_of(MBB.instrs(), isPrologueCFIInstruction);
+}
+
+static bool containsEpilogue(const MachineBasicBlock &MBB) {
+ return llvm::any_of(llvm::reverse(MBB), [](const auto &MI) {
+ return MI.getOpcode() == TargetOpcode::CFI_INSTRUCTION &&
+ MI.getFlag(MachineInstr::FrameDestroy);
+ });
+}
+
+bool CFIFixup::runOnMachineFunction(MachineFunction &MF) {
+ const TargetFrameLowering &TFL = *MF.getSubtarget().getFrameLowering();
+ if (!TFL.enableCFIFixup(MF))
+ return false;
+
+ const unsigned NumBlocks = MF.getNumBlockIDs();
+ if (NumBlocks < 2)
+ return false;
+
+ struct BlockFlags {
+ bool Reachable : 1;
+ bool StrongNoFrameOnEntry : 1;
+ bool HasFrameOnEntry : 1;
+ bool HasFrameOnExit : 1;
+ };
+ SmallVector<BlockFlags, 32> BlockInfo(NumBlocks, {false, false, false, false});
+ BlockInfo[0].Reachable = true;
+ BlockInfo[0].StrongNoFrameOnEntry = true;
+
+ // Compute the presence/absence of frame at each basic block.
+ MachineBasicBlock *PrologueBlock = nullptr;
+ ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
+ for (MachineBasicBlock *MBB : RPOT) {
+ BlockFlags &Info = BlockInfo[MBB->getNumber()];
+
+ // Set to true if the current block contains the prologue or the epilogue,
+ // respectively.
+ bool HasPrologue = false;
+ bool HasEpilogue = false;
+
+ if (!PrologueBlock && !Info.HasFrameOnEntry && containsPrologue(*MBB)) {
+ PrologueBlock = MBB;
+ HasPrologue = true;
+ }
+
+ if (Info.HasFrameOnEntry || HasPrologue)
+ HasEpilogue = containsEpilogue(*MBB);
+
+ // If the function has a call frame at the entry of the current block or the
+ // current block contains the prologue, then the function has a call frame
+ // at the exit of the block, unless the block contains the epilogue.
+ Info.HasFrameOnExit = (Info.HasFrameOnEntry || HasPrologue) && !HasEpilogue;
+
+ // Set the successors' state on entry.
+ for (MachineBasicBlock *Succ : MBB->successors()) {
+ BlockFlags &SuccInfo = BlockInfo[Succ->getNumber()];
+ SuccInfo.Reachable = true;
+ SuccInfo.StrongNoFrameOnEntry |=
+ Info.StrongNoFrameOnEntry && !HasPrologue;
+ SuccInfo.HasFrameOnEntry = Info.HasFrameOnExit;
+ }
+ }
+
+ if (!PrologueBlock)
+ return false;
+
+ // Walk the blocks of the function in "physical" order.
+ // Every block inherits the frame state (as recorded in the unwind tables)
+ // of the previous block. If the intended frame state is different, insert
+ // compensating CFI instructions.
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ bool Change = false;
+ // `InsertPt` always points to the point in a preceding block where we have to
+ // insert a `.cfi_remember_state`, in the case that the current block needs a
+ // `.cfi_restore_state`.
+ MachineBasicBlock *InsertMBB = PrologueBlock;
+ MachineBasicBlock::iterator InsertPt = PrologueBlock->begin();
+ for (MachineInstr &MI : *PrologueBlock)
+ if (isPrologueCFIInstruction(MI))
+ InsertPt = std::next(MI.getIterator());
+
+ assert(InsertPt != PrologueBlock->begin() &&
+ "Inconsistent notion of \"prologue block\"");
+
+ // No point starting before the prologue block.
+ // TODO: the unwind tables will still be incorrect if an epilogue physically
+ // preceeds the prologue.
+ MachineFunction::iterator CurrBB = std::next(PrologueBlock->getIterator());
+ bool HasFrame = BlockInfo[PrologueBlock->getNumber()].HasFrameOnExit;
+ while (CurrBB != MF.end()) {
+ const BlockFlags &Info = BlockInfo[CurrBB->getNumber()];
+ if (!Info.Reachable) {
+ ++CurrBB;
+ continue;
+ }
+
+#ifndef NDEBUG
+ if (!Info.StrongNoFrameOnEntry) {
+ for (auto *Pred : CurrBB->predecessors()) {
+ BlockFlags &PredInfo = BlockInfo[Pred->getNumber()];
+ assert((!PredInfo.Reachable ||
+ Info.HasFrameOnEntry == PredInfo.HasFrameOnExit) &&
+ "Inconsistent call frame state");
+ }
+ }
+#endif
+ if (!Info.StrongNoFrameOnEntry && Info.HasFrameOnEntry && !HasFrame) {
+ // Reset to the "after prologue" state.
+
+ // Insert a `.cfi_remember_state` into the last block known to have a
+ // stack frame.
+ unsigned CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::createRememberState(nullptr));
+ BuildMI(*InsertMBB, InsertPt, DebugLoc(),
+ TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ // Insert a `.cfi_restore_state` at the beginning of the current block.
+ CFIIndex = MF.addFrameInst(MCCFIInstruction::createRestoreState(nullptr));
+ InsertPt = BuildMI(*CurrBB, CurrBB->begin(), DebugLoc(),
+ TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ ++InsertPt;
+ InsertMBB = &*CurrBB;
+ Change = true;
+ } else if ((Info.StrongNoFrameOnEntry || !Info.HasFrameOnEntry) &&
+ HasFrame) {
+ // Reset to the state upon function entry.
+ TFL.resetCFIToInitialState(*CurrBB);
+ Change = true;
+ }
+
+ HasFrame = Info.HasFrameOnExit;
+ ++CurrBB;
+ }
+
+ return Change;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CFIInstrInserter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CFIInstrInserter.cpp
index de173a9dfd62..42523c47a671 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CFIInstrInserter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CFIInstrInserter.cpp
@@ -19,16 +19,14 @@
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/Optional.h"
-#include "llvm/ADT/SetOperations.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/InitializePasses.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/MC/MCDwarf.h"
using namespace llvm;
static cl::opt<bool> VerifyCFI("verify-cfiinstrs",
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp
index 84a0e4142bb6..689e49978d43 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp
@@ -145,11 +145,6 @@ void VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &LI) {
LI.setWeight(Weight);
}
-float VirtRegAuxInfo::futureWeight(LiveInterval &LI, SlotIndex Start,
- SlotIndex End) {
- return weightCalcHelper(LI, &Start, &End);
-}
-
float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start,
SlotIndex *End) {
MachineRegisterInfo &MRI = MF.getRegInfo();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CallingConvLower.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CallingConvLower.cpp
index c9246f6e8754..f74ff30ab2e1 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CallingConvLower.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CallingConvLower.cpp
@@ -14,16 +14,14 @@
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
-#include "llvm/IR/DataLayout.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/SaveAndRestore.h"
#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
using namespace llvm;
@@ -72,15 +70,9 @@ bool CCState::IsShadowAllocatedReg(MCRegister Reg) const {
if (!isAllocated(Reg))
return false;
- for (auto const &ValAssign : Locs) {
- if (ValAssign.isRegLoc()) {
- for (MCRegAliasIterator AI(ValAssign.getLocReg(), &TRI, true);
- AI.isValid(); ++AI) {
- if (*AI == Reg)
- return false;
- }
- }
- }
+ for (auto const &ValAssign : Locs)
+ if (ValAssign.isRegLoc() && TRI.regsOverlap(ValAssign.getLocReg(), Reg))
+ return false;
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp
index 7c236a9785d8..5050395fbc0f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp
@@ -24,6 +24,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeBranchFolderPassPass(Registry);
initializeBranchRelaxationPass(Registry);
initializeCFGuardLongjmpPass(Registry);
+ initializeCFIFixupPass(Registry);
initializeCFIInstrInserterPass(Registry);
initializeCheckDebugMachineModulePass(Registry);
initializeCodeGenPreparePass(Registry);
@@ -50,6 +51,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeIndirectBrExpandPassPass(Registry);
initializeInterleavedLoadCombinePass(Registry);
initializeInterleavedAccessPass(Registry);
+ initializeJMCInstrumenterPass(Registry);
initializeLiveDebugValuesPass(Registry);
initializeLiveDebugVariablesPass(Registry);
initializeLiveIntervalsPass(Registry);
@@ -57,6 +59,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeLiveStacksPass(Registry);
initializeLiveVariablesPass(Registry);
initializeLocalStackSlotPassPass(Registry);
+ initializeLowerGlobalDtorsLegacyPassPass(Registry);
initializeLowerIntrinsicsPass(Registry);
initializeMIRAddFSDiscriminatorsPass(Registry);
initializeMIRCanonicalizerPass(Registry);
@@ -104,6 +107,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeRemoveRedundantDebugValuesPass(Registry);
initializeRenameIndependentSubregsPass(Registry);
initializeSafeStackLegacyPassPass(Registry);
+ initializeSelectOptimizePass(Registry);
initializeShadowStackGCLoweringPass(Registry);
initializeShrinkWrapPass(Registry);
initializeSjLjEHPreparePass(Registry);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenCommonISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenCommonISel.cpp
index 877aa69c3e58..8f185a161bd0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenCommonISel.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenCommonISel.cpp
@@ -129,7 +129,9 @@ llvm::findSplitPointForStackProtector(MachineBasicBlock *BB,
MachineBasicBlock::iterator Start = BB->begin();
MachineBasicBlock::iterator Previous = SplitPoint;
- --Previous;
+ do {
+ --Previous;
+ } while (Previous != Start && Previous->isDebugInstr());
if (TII.isTailCall(*SplitPoint) &&
Previous->getOpcode() == TII.getCallFrameDestroyOpcode()) {
@@ -142,7 +144,7 @@ llvm::findSplitPointForStackProtector(MachineBasicBlock *BB,
// ADJCALLSTACKUP ...
// TAILJMP somewhere
// On the other hand, it could be an unrelated call in which case this tail
- // call has to register moves of its own and should be the split point. For
+ // call has no register moves of its own and should be the split point. For
// example:
// ADJCALLSTACKDOWN
// CALL something_else
@@ -167,3 +169,31 @@ llvm::findSplitPointForStackProtector(MachineBasicBlock *BB,
return SplitPoint;
}
+
+unsigned llvm::getInvertedFPClassTest(unsigned Test) {
+ unsigned InvertedTest = ~Test & fcAllFlags;
+ switch (InvertedTest) {
+ default:
+ break;
+ case fcNan:
+ case fcSNan:
+ case fcQNan:
+ case fcInf:
+ case fcPosInf:
+ case fcNegInf:
+ case fcNormal:
+ case fcPosNormal:
+ case fcNegNormal:
+ case fcSubnormal:
+ case fcPosSubnormal:
+ case fcNegSubnormal:
+ case fcZero:
+ case fcPosZero:
+ case fcNegZero:
+ case fcFinite:
+ case fcPosFinite:
+ case fcNegFinite:
+ return InvertedTest;
+ }
+ return 0;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp
index c888adeafca5..6778af22f532 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -23,16 +23,15 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
-#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -174,12 +173,11 @@ static cl::opt<bool> DisablePreheaderProtect(
cl::desc("Disable protection against removing loop preheaders"));
static cl::opt<bool> ProfileGuidedSectionPrefix(
- "profile-guided-section-prefix", cl::Hidden, cl::init(true), cl::ZeroOrMore,
+ "profile-guided-section-prefix", cl::Hidden, cl::init(true),
cl::desc("Use profile info to add section prefix for hot/cold functions"));
static cl::opt<bool> ProfileUnknownInSpecialSection(
- "profile-unknown-in-special-section", cl::Hidden, cl::init(false),
- cl::ZeroOrMore,
+ "profile-unknown-in-special-section", cl::Hidden,
cl::desc("In profiling mode like sampleFDO, if a function doesn't have "
"profile, we cannot tell the function is cold for sure because "
"it may be a function newly added without ever being sampled. "
@@ -188,6 +186,15 @@ static cl::opt<bool> ProfileUnknownInSpecialSection(
"to handle it in a different way than .text section, to save "
"RAM for example. "));
+static cl::opt<bool> BBSectionsGuidedSectionPrefix(
+ "bbsections-guided-section-prefix", cl::Hidden, cl::init(true),
+ cl::desc("Use the basic-block-sections profile to determine the text "
+ "section prefix for hot functions. Functions with "
+ "basic-block-sections profile will be placed in `.text.hot` "
+ "regardless of their FDO profile info. Other functions won't be "
+ "impacted, i.e., their prefixes will be decided by FDO/sampleFDO "
+ "profiles."));
+
static cl::opt<unsigned> FreqRatioToSkipMerge(
"cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2),
cl::desc("Skip merging empty blocks if (frequency of empty block) / "
@@ -274,6 +281,7 @@ class TypePromotionTransaction;
const TargetLowering *TLI = nullptr;
const TargetRegisterInfo *TRI;
const TargetTransformInfo *TTI = nullptr;
+ const BasicBlockSectionsProfileReader *BBSectionsProfileReader = nullptr;
const TargetLibraryInfo *TLInfo;
const LoopInfo *LI;
std::unique_ptr<BlockFrequencyInfo> BFI;
@@ -349,6 +357,7 @@ class TypePromotionTransaction;
AU.addRequired<TargetPassConfig>();
AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
+ AU.addUsedIfAvailable<BasicBlockSectionsProfileReader>();
}
private:
@@ -401,6 +410,8 @@ class TypePromotionTransaction;
bool optimizeFunnelShift(IntrinsicInst *Fsh);
bool optimizeSelectInst(SelectInst *SI);
bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI);
+ bool optimizeSwitchType(SwitchInst *SI);
+ bool optimizeSwitchPhiConstants(SwitchInst *SI);
bool optimizeSwitchInst(SwitchInst *SI);
bool optimizeExtractElementInst(Instruction *Inst);
bool dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT);
@@ -442,6 +453,7 @@ char CodeGenPrepare::ID = 0;
INITIALIZE_PASS_BEGIN(CodeGenPrepare, DEBUG_TYPE,
"Optimize for code generation", false, false)
+INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReader)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
@@ -473,8 +485,14 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
BPI.reset(new BranchProbabilityInfo(F, *LI));
BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI));
PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ BBSectionsProfileReader =
+ getAnalysisIfAvailable<BasicBlockSectionsProfileReader>();
OptSize = F.hasOptSize();
- if (ProfileGuidedSectionPrefix) {
+ // Use the basic-block-sections profile to promote hot functions to .text.hot if requested.
+ if (BBSectionsGuidedSectionPrefix && BBSectionsProfileReader &&
+ BBSectionsProfileReader->isFunctionHot(F.getName())) {
+ F.setSectionPrefix("hot");
+ } else if (ProfileGuidedSectionPrefix) {
// The hot attribute overwrites profile count based hotness while profile
// counts based hotness overwrite the cold attribute.
// This is a conservative behabvior.
@@ -524,7 +542,8 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
// Split some critical edges where one of the sources is an indirect branch,
// to help generate sane code for PHIs involving such edges.
- EverMadeChange |= SplitIndirectBrCriticalEdges(F);
+ EverMadeChange |=
+ SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/true);
bool MadeChange = true;
while (MadeChange) {
@@ -2037,7 +2056,8 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros,
return false;
// Bail if the value is never zero.
- if (llvm::isKnownNonZero(CountZeros->getOperand(0), *DL))
+ Use &Op = CountZeros->getOperandUse(0);
+ if (isKnownNonZero(Op, *DL))
return false;
// The intrinsic will be sunk behind a compare against zero and branch.
@@ -2058,7 +2078,10 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros,
// Replace the unconditional branch that was created by the first split with
// a compare against zero and a conditional branch.
Value *Zero = Constant::getNullValue(Ty);
- Value *Cmp = Builder.CreateICmpEQ(CountZeros->getOperand(0), Zero, "cmpz");
+ // Avoid introducing branch on poison. This also replaces the ctz operand.
+ if (!isGuaranteedNotToBeUndefOrPoison(Op))
+ Op = Builder.CreateFreeze(Op, Op->getName() + ".fr");
+ Value *Cmp = Builder.CreateICmpEQ(Op, Zero, "cmpz");
Builder.CreateCondBr(Cmp, EndBlock, CallBlock);
StartBlock->getTerminator()->eraseFromParent();
@@ -2101,7 +2124,8 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
// Align the pointer arguments to this call if the target thinks it's a good
// idea
- unsigned MinSize, PrefAlign;
+ unsigned MinSize;
+ Align PrefAlign;
if (TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
for (auto &Arg : CI->args()) {
// We want to align both objects whose address is used directly and
@@ -2115,12 +2139,12 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
0);
Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*DL, Offset);
uint64_t Offset2 = Offset.getLimitedValue();
- if ((Offset2 & (PrefAlign-1)) != 0)
+ if (!isAligned(PrefAlign, Offset2))
continue;
AllocaInst *AI;
- if ((AI = dyn_cast<AllocaInst>(Val)) && AI->getAlignment() < PrefAlign &&
+ if ((AI = dyn_cast<AllocaInst>(Val)) && AI->getAlign() < PrefAlign &&
DL->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2)
- AI->setAlignment(Align(PrefAlign));
+ AI->setAlignment(PrefAlign);
// Global variables can only be aligned if they are defined in this
// object (i.e. they are uniquely initialized in this object), and
// over-aligning global variables that have an explicit section is
@@ -2130,7 +2154,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
GV->getPointerAlignment(*DL) < PrefAlign &&
DL->getTypeAllocSize(GV->getValueType()) >=
MinSize + Offset2)
- GV->setAlignment(MaybeAlign(PrefAlign));
+ GV->setAlignment(PrefAlign);
}
// If this is a memcpy (or similar) then we may be able to improve the
// alignment
@@ -3371,7 +3395,7 @@ public:
if (!Visited.insert(P).second)
continue;
if (auto *PI = dyn_cast<Instruction>(P))
- if (Value *V = SimplifyInstruction(cast<Instruction>(PI), SQ)) {
+ if (Value *V = simplifyInstruction(cast<Instruction>(PI), SQ)) {
for (auto *U : PI->users())
WorkList.push_back(cast<Value>(U));
Put(PI, V);
@@ -3416,7 +3440,7 @@ public:
void destroyNewNodes(Type *CommonType) {
// For safe erasing, replace the uses with dummy value first.
- auto *Dummy = UndefValue::get(CommonType);
+ auto *Dummy = PoisonValue::get(CommonType);
for (auto *I : AllPhiNodes) {
I->replaceAllUsesWith(Dummy);
I->eraseFromParent();
@@ -3785,7 +3809,7 @@ private:
SmallVector<Value *, 32> Worklist;
assert((isa<PHINode>(Original) || isa<SelectInst>(Original)) &&
"Address must be a Phi or Select node");
- auto *Dummy = UndefValue::get(CommonType);
+ auto *Dummy = PoisonValue::get(CommonType);
Worklist.push_back(Original);
while (!Worklist.empty()) {
Value *Current = Worklist.pop_back_val();
@@ -4550,9 +4574,9 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
if (!RHS || RHS->getBitWidth() > 64)
return false;
- int64_t Scale = RHS->getSExtValue();
- if (Opcode == Instruction::Shl)
- Scale = 1LL << Scale;
+ int64_t Scale = Opcode == Instruction::Shl
+ ? 1LL << RHS->getLimitedValue(RHS->getBitWidth() - 1)
+ : RHS->getSExtValue();
return matchScaledValue(AddrInst->getOperand(0), Scale, Depth);
}
@@ -4783,7 +4807,6 @@ bool AddressingModeMatcher::matchAddr(Value *Addr, unsigned Depth) {
}
// It isn't profitable to do this, roll back.
- //cerr << "NOT FOLDING: " << *I;
AddrMode = BackupAddrMode;
AddrModeInsts.resize(OldSize);
TPT.rollback(LastKnownGood);
@@ -4836,7 +4859,7 @@ static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
TLI.ComputeConstraintToUse(OpInfo, SDValue());
// If this asm operand is our Value*, and if it isn't an indirect memory
- // operand, we can't fold it!
+ // operand, we can't fold it! TODO: Also handle C_Address?
if (OpInfo.CallOperandVal == OpVal &&
(OpInfo.ConstraintType != TargetLowering::C_Memory ||
!OpInfo.isIndirect))
@@ -5158,8 +5181,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// GEP, collect the GEP. Skip the GEPs that are the new bases of
// previously split data structures.
LargeOffsetGEPMap[GEP->getPointerOperand()].push_back(LargeOffsetGEP);
- if (LargeOffsetGEPID.find(GEP) == LargeOffsetGEPID.end())
- LargeOffsetGEPID[GEP] = LargeOffsetGEPID.size();
+ LargeOffsetGEPID.insert(std::make_pair(GEP, LargeOffsetGEPID.size()));
}
NewAddrMode.OriginalValue = V;
@@ -5323,11 +5345,8 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// SDAG consecutive load/store merging.
if (ResultPtr->getType() != I8PtrTy)
ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
- ResultPtr =
- AddrMode.InBounds
- ? Builder.CreateInBoundsGEP(I8Ty, ResultPtr, ResultIndex,
- "sunkaddr")
- : Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
+ ResultPtr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex,
+ "sunkaddr", AddrMode.InBounds);
}
ResultIndex = V;
@@ -5338,11 +5357,8 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
} else {
if (ResultPtr->getType() != I8PtrTy)
ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
- SunkAddr =
- AddrMode.InBounds
- ? Builder.CreateInBoundsGEP(I8Ty, ResultPtr, ResultIndex,
- "sunkaddr")
- : Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
+ SunkAddr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr",
+ AddrMode.InBounds);
}
if (SunkAddr->getType() != Addr->getType())
@@ -5619,6 +5635,7 @@ bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) {
// Compute the constraint code and ConstraintType to use.
TLI->ComputeConstraintToUse(OpInfo, SDValue());
+ // TODO: Also handle C_Address?
if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
OpInfo.isIndirect) {
Value *OpVal = CS->getArgOperand(ArgNo++);
@@ -6002,31 +6019,25 @@ bool CodeGenPrepare::optimizePhiType(
for (Value *V : Phi->incoming_values()) {
if (auto *OpPhi = dyn_cast<PHINode>(V)) {
if (!PhiNodes.count(OpPhi)) {
- if (Visited.count(OpPhi))
+ if (!Visited.insert(OpPhi).second)
return false;
PhiNodes.insert(OpPhi);
- Visited.insert(OpPhi);
Worklist.push_back(OpPhi);
}
} else if (auto *OpLoad = dyn_cast<LoadInst>(V)) {
if (!OpLoad->isSimple())
return false;
- if (!Defs.count(OpLoad)) {
- Defs.insert(OpLoad);
+ if (Defs.insert(OpLoad).second)
Worklist.push_back(OpLoad);
- }
} else if (auto *OpEx = dyn_cast<ExtractElementInst>(V)) {
- if (!Defs.count(OpEx)) {
- Defs.insert(OpEx);
+ if (Defs.insert(OpEx).second)
Worklist.push_back(OpEx);
- }
} else if (auto *OpBC = dyn_cast<BitCastInst>(V)) {
if (!ConvertTy)
ConvertTy = OpBC->getOperand(0)->getType();
if (OpBC->getOperand(0)->getType() != ConvertTy)
return false;
- if (!Defs.count(OpBC)) {
- Defs.insert(OpBC);
+ if (Defs.insert(OpBC).second) {
Worklist.push_back(OpBC);
AnyAnchored |= !isa<LoadInst>(OpBC->getOperand(0)) &&
!isa<ExtractElementInst>(OpBC->getOperand(0));
@@ -6127,7 +6138,7 @@ bool CodeGenPrepare::optimizePhiTypes(Function &F) {
// Remove any old phi's that have been converted.
for (auto *I : DeletedInstrs) {
- I->replaceAllUsesWith(UndefValue::get(I->getType()));
+ I->replaceAllUsesWith(PoisonValue::get(I->getType()));
I->eraseFromParent();
}
@@ -6979,12 +6990,12 @@ bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) {
return Changed;
}
-bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
+bool CodeGenPrepare::optimizeSwitchType(SwitchInst *SI) {
Value *Cond = SI->getCondition();
Type *OldType = Cond->getType();
LLVMContext &Context = Cond->getContext();
EVT OldVT = TLI->getValueType(*DL, OldType);
- MVT RegType = TLI->getRegisterType(Context, OldVT);
+ MVT RegType = TLI->getPreferredSwitchConditionType(Context, OldVT);
unsigned RegWidth = RegType.getSizeInBits();
if (RegWidth <= cast<IntegerType>(OldType)->getBitWidth())
@@ -7019,7 +7030,7 @@ bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
ExtInst->setDebugLoc(SI->getDebugLoc());
SI->setCondition(ExtInst);
for (auto Case : SI->cases()) {
- APInt NarrowConst = Case.getCaseValue()->getValue();
+ const APInt &NarrowConst = Case.getCaseValue()->getValue();
APInt WideConst = (ExtType == Instruction::ZExt) ?
NarrowConst.zext(RegWidth) : NarrowConst.sext(RegWidth);
Case.setValue(ConstantInt::get(Context, WideConst));
@@ -7028,6 +7039,89 @@ bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
return true;
}
+bool CodeGenPrepare::optimizeSwitchPhiConstants(SwitchInst *SI) {
+ // The SCCP optimization tends to produce code like this:
+ // switch(x) { case 42: phi(42, ...) }
+ // Materializing the constant for the phi-argument needs instructions; So we
+ // change the code to:
+ // switch(x) { case 42: phi(x, ...) }
+
+ Value *Condition = SI->getCondition();
+ // Avoid endless loop in degenerate case.
+ if (isa<ConstantInt>(*Condition))
+ return false;
+
+ bool Changed = false;
+ BasicBlock *SwitchBB = SI->getParent();
+ Type *ConditionType = Condition->getType();
+
+ for (const SwitchInst::CaseHandle &Case : SI->cases()) {
+ ConstantInt *CaseValue = Case.getCaseValue();
+ BasicBlock *CaseBB = Case.getCaseSuccessor();
+ // Set to true if we previously checked that `CaseBB` is only reached by
+ // a single case from this switch.
+ bool CheckedForSinglePred = false;
+ for (PHINode &PHI : CaseBB->phis()) {
+ Type *PHIType = PHI.getType();
+ // If ZExt is free then we can also catch patterns like this:
+ // switch((i32)x) { case 42: phi((i64)42, ...); }
+ // and replace `(i64)42` with `zext i32 %x to i64`.
+ bool TryZExt =
+ PHIType->isIntegerTy() &&
+ PHIType->getIntegerBitWidth() > ConditionType->getIntegerBitWidth() &&
+ TLI->isZExtFree(ConditionType, PHIType);
+ if (PHIType == ConditionType || TryZExt) {
+ // Set to true to skip this case because of multiple preds.
+ bool SkipCase = false;
+ Value *Replacement = nullptr;
+ for (unsigned I = 0, E = PHI.getNumIncomingValues(); I != E; I++) {
+ Value *PHIValue = PHI.getIncomingValue(I);
+ if (PHIValue != CaseValue) {
+ if (!TryZExt)
+ continue;
+ ConstantInt *PHIValueInt = dyn_cast<ConstantInt>(PHIValue);
+ if (!PHIValueInt ||
+ PHIValueInt->getValue() !=
+ CaseValue->getValue().zext(PHIType->getIntegerBitWidth()))
+ continue;
+ }
+ if (PHI.getIncomingBlock(I) != SwitchBB)
+ continue;
+ // We cannot optimize if there are multiple case labels jumping to
+ // this block. This check may get expensive when there are many
+ // case labels so we test for it last.
+ if (!CheckedForSinglePred) {
+ CheckedForSinglePred = true;
+ if (SI->findCaseDest(CaseBB) == nullptr) {
+ SkipCase = true;
+ break;
+ }
+ }
+
+ if (Replacement == nullptr) {
+ if (PHIValue == CaseValue) {
+ Replacement = Condition;
+ } else {
+ IRBuilder<> Builder(SI);
+ Replacement = Builder.CreateZExt(Condition, PHIType);
+ }
+ }
+ PHI.setIncomingValue(I, Replacement);
+ Changed = true;
+ }
+ if (SkipCase)
+ break;
+ }
+ }
+ }
+ return Changed;
+}
+
+bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
+ bool Changed = optimizeSwitchType(SI);
+ Changed |= optimizeSwitchPhiConstants(SI);
+ return Changed;
+}
namespace {
@@ -7777,7 +7871,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
// It is possible for very late stage optimizations (such as SimplifyCFG)
// to introduce PHI nodes too late to be cleaned up. If we detect such a
// trivial PHI, go ahead and zap it here.
- if (Value *V = SimplifyInstruction(P, {*DL, TLInfo})) {
+ if (Value *V = simplifyInstruction(P, {*DL, TLInfo})) {
LargeOffsetGEPMap.erase(P);
P->replaceAllUsesWith(V);
P->eraseFromParent();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp
index 1d50e1d22b95..fd52191882cb 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp
@@ -13,7 +13,12 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/CommandFlags.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
+#include "llvm/MC/MCTargetOptionsCommandFlags.h"
#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Host.h"
@@ -58,6 +63,7 @@ CGOPT(bool, EnableUnsafeFPMath)
CGOPT(bool, EnableNoInfsFPMath)
CGOPT(bool, EnableNoNaNsFPMath)
CGOPT(bool, EnableNoSignedZerosFPMath)
+CGOPT(bool, EnableApproxFuncFPMath)
CGOPT(bool, EnableNoTrappingFPMath)
CGOPT(bool, EnableAIXExtendedAltivecABI)
CGOPT(DenormalMode::DenormalModeKind, DenormalFPMath)
@@ -73,6 +79,7 @@ CGOPT(bool, StackSymbolOrdering)
CGOPT(bool, StackRealign)
CGOPT(std::string, TrapFuncName)
CGOPT(bool, UseCtors)
+CGOPT(bool, LowerGlobalDtorsViaCxaAtExit)
CGOPT(bool, RelaxELFRelocations)
CGOPT_EXP(bool, DataSections)
CGOPT_EXP(bool, FunctionSections)
@@ -94,6 +101,7 @@ CGOPT(bool, ForceDwarfFrameSection)
CGOPT(bool, XRayOmitFunctionIndex)
CGOPT(bool, DebugStrictDwarf)
CGOPT(unsigned, AlignLoops)
+CGOPT(bool, JMCInstrument)
codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
#define CGBINDOPT(NAME) \
@@ -218,6 +226,12 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
cl::init(false));
CGBINDOPT(EnableNoSignedZerosFPMath);
+ static cl::opt<bool> EnableApproxFuncFPMath(
+ "enable-approx-func-fp-math",
+ cl::desc("Enable FP math optimizations that assume approx func"),
+ cl::init(false));
+ CGBINDOPT(EnableApproxFuncFPMath);
+
static cl::opt<bool> EnableNoTrappingFPMath(
"enable-no-trapping-fp-math",
cl::desc("Enable setting the FP exceptions build "
@@ -333,6 +347,12 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
cl::init(false));
CGBINDOPT(UseCtors);
+ static cl::opt<bool> LowerGlobalDtorsViaCxaAtExit(
+ "lower-global-dtors-via-cxa-atexit",
+ cl::desc("Lower llvm.global_dtors (global destructors) via __cxa_atexit"),
+ cl::init(true));
+ CGBINDOPT(LowerGlobalDtorsViaCxaAtExit);
+
static cl::opt<bool> RelaxELFRelocations(
"relax-elf-relocations",
cl::desc(
@@ -457,6 +477,12 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
cl::desc("Default alignment for loops"));
CGBINDOPT(AlignLoops);
+ static cl::opt<bool> JMCInstrument(
+ "enable-jmc-instrument",
+ cl::desc("Instrument functions with a call to __CheckForDebuggerJustMyCode"),
+ cl::init(false));
+ CGBINDOPT(JMCInstrument);
+
#undef CGBINDOPT
mc::RegisterMCTargetOptionsFlags();
@@ -493,6 +519,7 @@ codegen::InitTargetOptionsFromCodeGenFlags(const Triple &TheTriple) {
Options.NoInfsFPMath = getEnableNoInfsFPMath();
Options.NoNaNsFPMath = getEnableNoNaNsFPMath();
Options.NoSignedZerosFPMath = getEnableNoSignedZerosFPMath();
+ Options.ApproxFuncFPMath = getEnableApproxFuncFPMath();
Options.NoTrappingFPMath = getEnableNoTrappingFPMath();
DenormalMode::DenormalModeKind DenormKind = getDenormalFPMath();
@@ -509,9 +536,10 @@ codegen::InitTargetOptionsFromCodeGenFlags(const Triple &TheTriple) {
Options.GuaranteedTailCallOpt = getEnableGuaranteedTailCallOpt();
Options.StackSymbolOrdering = getStackSymbolOrdering();
Options.UseInitArray = !getUseCtors();
+ Options.LowerGlobalDtorsViaCxaAtExit = getLowerGlobalDtorsViaCxaAtExit();
Options.RelaxELFRelocations = getRelaxELFRelocations();
Options.DataSections =
- getExplicitDataSections().getValueOr(TheTriple.hasDefaultDataSections());
+ getExplicitDataSections().value_or(TheTriple.hasDefaultDataSections());
Options.FunctionSections = getFunctionSections();
Options.IgnoreXCOFFVisibility = getIgnoreXCOFFVisibility();
Options.XCOFFTracebackTable = getXCOFFTracebackTable();
@@ -531,6 +559,7 @@ codegen::InitTargetOptionsFromCodeGenFlags(const Triple &TheTriple) {
Options.XRayOmitFunctionIndex = getXRayOmitFunctionIndex();
Options.DebugStrictDwarf = getDebugStrictDwarf();
Options.LoopAlignment = getAlignLoops();
+ Options.JMCInstrument = getJMCInstrument();
Options.MCOptions = mc::InitMCTargetOptionsFromFlags();
@@ -643,6 +672,7 @@ void codegen::setFunctionAttributes(StringRef CPU, StringRef Features,
HANDLE_BOOL_ATTR(EnableNoInfsFPMathView, "no-infs-fp-math");
HANDLE_BOOL_ATTR(EnableNoNaNsFPMathView, "no-nans-fp-math");
HANDLE_BOOL_ATTR(EnableNoSignedZerosFPMathView, "no-signed-zeros-fp-math");
+ HANDLE_BOOL_ATTR(EnableApproxFuncFPMathView, "approx-func-fp-math");
if (DenormalFPMathView->getNumOccurrences() > 0 &&
!F.hasFnAttribute("denormal-fp-math")) {
@@ -684,4 +714,3 @@ void codegen::setFunctionAttributes(StringRef CPU, StringRef Features,
for (Function &F : M)
setFunctionAttributes(CPU, Features, F);
}
-
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/DFAPacketizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/DFAPacketizer.cpp
index d38bacdb1aa7..42192f41dbda 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/DFAPacketizer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/DFAPacketizer.cpp
@@ -30,10 +30,10 @@
#include "llvm/CodeGen/MachineInstrBundle.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/CodeGen/ScheduleDAGMutation.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/MC/MCInstrDesc.h"
-#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
index 5579152f1ce0..ce00be634e9a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -14,7 +14,6 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/DetectDeadLanes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/DetectDeadLanes.cpp
index 1337e57f360b..565c8b405f82 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/DetectDeadLanes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/DetectDeadLanes.cpp
@@ -28,12 +28,9 @@
#include "llvm/ADT/BitVector.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
-#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
-#include "llvm/PassRegistry.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include <deque>
@@ -93,7 +90,7 @@ private:
LaneBitmask transferUsedLanes(const MachineInstr &MI, LaneBitmask UsedLanes,
const MachineOperand &MO) const;
- bool runOnce(MachineFunction &MF);
+ std::pair<bool, bool> runOnce(MachineFunction &MF);
LaneBitmask determineInitialDefinedLanes(unsigned Reg);
LaneBitmask determineInitialUsedLanes(unsigned Reg);
@@ -487,7 +484,7 @@ bool DetectDeadLanes::isUndefInput(const MachineOperand &MO,
return true;
}
-bool DetectDeadLanes::runOnce(MachineFunction &MF) {
+std::pair<bool, bool> DetectDeadLanes::runOnce(MachineFunction &MF) {
// First pass: Populate defs/uses of vregs with initial values
unsigned NumVirtRegs = MRI->getNumVirtRegs();
for (unsigned RegIdx = 0; RegIdx < NumVirtRegs; ++RegIdx) {
@@ -528,6 +525,7 @@ bool DetectDeadLanes::runOnce(MachineFunction &MF) {
dbgs() << "\n";
});
+ bool Changed = false;
bool Again = false;
// Mark operands as dead/unused.
for (MachineBasicBlock &MBB : MF) {
@@ -544,6 +542,7 @@ bool DetectDeadLanes::runOnce(MachineFunction &MF) {
LLVM_DEBUG(dbgs()
<< "Marking operand '" << MO << "' as dead in " << MI);
MO.setIsDead();
+ Changed = true;
}
if (MO.readsReg()) {
bool CrossCopy = false;
@@ -551,10 +550,12 @@ bool DetectDeadLanes::runOnce(MachineFunction &MF) {
LLVM_DEBUG(dbgs()
<< "Marking operand '" << MO << "' as undef in " << MI);
MO.setIsUndef();
+ Changed = true;
} else if (isUndefInput(MO, &CrossCopy)) {
LLVM_DEBUG(dbgs()
<< "Marking operand '" << MO << "' as undef in " << MI);
MO.setIsUndef();
+ Changed = true;
if (CrossCopy)
Again = true;
}
@@ -563,7 +564,7 @@ bool DetectDeadLanes::runOnce(MachineFunction &MF) {
}
}
- return Again;
+ return std::make_pair(Changed, Again);
}
bool DetectDeadLanes::runOnMachineFunction(MachineFunction &MF) {
@@ -585,13 +586,16 @@ bool DetectDeadLanes::runOnMachineFunction(MachineFunction &MF) {
WorklistMembers.resize(NumVirtRegs);
DefinedByCopy.resize(NumVirtRegs);
+ bool Changed = false;
bool Again;
do {
- Again = runOnce(MF);
+ bool LocalChanged;
+ std::tie(LocalChanged, Again) = runOnce(MF);
+ Changed |= LocalChanged;
} while(Again);
DefinedByCopy.clear();
WorklistMembers.clear();
delete[] VRegInfos;
- return true;
+ return Changed;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/EHContGuardCatchret.cpp b/contrib/llvm-project/llvm/lib/CodeGen/EHContGuardCatchret.cpp
index c18532946bf9..b26aa792bb93 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/EHContGuardCatchret.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/EHContGuardCatchret.cpp
@@ -17,9 +17,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/InitializePasses.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp b/contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp
index 6a0da4dad3c1..32858d043383 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp
@@ -17,10 +17,10 @@
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/PostOrderIterator.h"
-#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SparseSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -30,7 +30,6 @@
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineTraceMetrics.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
@@ -664,8 +663,8 @@ void SSAIfConv::rewritePHIOperands() {
PI.PHI->getOperand(i-1).setMBB(Head);
PI.PHI->getOperand(i-2).setReg(DstReg);
} else if (MBB == getFPred()) {
- PI.PHI->RemoveOperand(i-1);
- PI.PHI->RemoveOperand(i-2);
+ PI.PHI->removeOperand(i-1);
+ PI.PHI->removeOperand(i-2);
}
}
LLVM_DEBUG(dbgs() << " --> " << *PI.PHI);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ExpandMemCmp.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ExpandMemCmp.cpp
index 60ee1812ee2c..b2639636dda7 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ExpandMemCmp.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ExpandMemCmp.cpp
@@ -19,7 +19,6 @@
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Dominators.h"
@@ -32,6 +31,10 @@
using namespace llvm;
+namespace llvm {
+class TargetLowering;
+}
+
#define DEBUG_TYPE "expandmemcmp"
STATISTIC(NumMemCmpCalls, "Number of memcmp calls");
@@ -737,7 +740,7 @@ Value *MemCmpExpansion::getMemCmpExpansion() {
static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
const TargetLowering *TLI, const DataLayout *DL,
ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI,
- DomTreeUpdater *DTU) {
+ DomTreeUpdater *DTU, const bool IsBCmp) {
NumMemCmpCalls++;
// Early exit from expansion if -Oz.
@@ -757,7 +760,8 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
}
// TTI call to check if target would like to expand memcmp. Also, get the
// available load sizes.
- const bool IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI);
+ const bool IsUsedForZeroCmp =
+ IsBCmp || isOnlyUsedInZeroEqualityComparison(CI);
bool OptForSize = CI->getFunction()->hasOptSize() ||
llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI);
auto Options = TTI->enableMemCmpExpansion(OptForSize,
@@ -861,7 +865,7 @@ bool ExpandMemCmpPass::runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI,
LibFunc Func;
if (TLI->getLibFunc(*CI, Func) &&
(Func == LibFunc_memcmp || Func == LibFunc_bcmp) &&
- expandMemCmp(CI, TTI, TL, &DL, PSI, BFI, DTU)) {
+ expandMemCmp(CI, TTI, TL, &DL, PSI, BFI, DTU, Func == LibFunc_bcmp)) {
return true;
}
}
@@ -881,7 +885,7 @@ ExpandMemCmpPass::runImpl(Function &F, const TargetLibraryInfo *TLI,
bool MadeChanges = false;
for (auto BBIt = F.begin(); BBIt != F.end();) {
if (runOnBlock(*BBIt, TLI, TTI, TL, DL, PSI, BFI,
- DTU.hasValue() ? DTU.getPointer() : nullptr)) {
+ DTU ? DTU.getPointer() : nullptr)) {
MadeChanges = true;
// If changes were made, restart the function from the beginning, since
// the structure of the function was changed.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
index d9caa8ad42d0..086b4a4dcc47 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
@@ -13,8 +13,6 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -104,8 +102,8 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) {
if (MI->allDefsAreDead()) {
MI->setDesc(TII->get(TargetOpcode::KILL));
- MI->RemoveOperand(3); // SubIdx
- MI->RemoveOperand(1); // Imm
+ MI->removeOperand(3); // SubIdx
+ MI->removeOperand(1); // Imm
LLVM_DEBUG(dbgs() << "subreg: replaced by: " << *MI);
return true;
}
@@ -117,8 +115,8 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) {
// We must leave %rax live.
if (DstReg != InsReg) {
MI->setDesc(TII->get(TargetOpcode::KILL));
- MI->RemoveOperand(3); // SubIdx
- MI->RemoveOperand(1); // Imm
+ MI->removeOperand(3); // SubIdx
+ MI->removeOperand(1); // Imm
LLVM_DEBUG(dbgs() << "subreg: replace by: " << *MI);
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ExpandReductions.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ExpandReductions.cpp
index 2bcaf750911b..f08c47d220ea 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ExpandReductions.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ExpandReductions.cpp
@@ -14,12 +14,10 @@
#include "llvm/CodeGen/ExpandReductions.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ExpandVectorPredication.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ExpandVectorPredication.cpp
index bb8d2b3e9a78..7883a48d121c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ExpandVectorPredication.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ExpandVectorPredication.cpp
@@ -23,13 +23,11 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/MathExtras.h"
using namespace llvm;
@@ -115,6 +113,17 @@ static void replaceOperation(Value &NewOp, VPIntrinsic &OldOp) {
OldOp.eraseFromParent();
}
+static bool maySpeculateLanes(VPIntrinsic &VPI) {
+ // The result of VP reductions depends on the mask and evl.
+ if (isa<VPReductionIntrinsic>(VPI))
+ return false;
+ // Fallback to whether the intrinsic is speculatable.
+ Optional<unsigned> OpcOpt = VPI.getFunctionalOpcode();
+ unsigned FunctionalOpc = OpcOpt.value_or((unsigned)Instruction::Call);
+ return isSafeToSpeculativelyExecuteWithOpcode(FunctionalOpc,
+ cast<Operator>(&VPI));
+}
+
//// } Helpers
namespace {
@@ -218,8 +227,7 @@ Value *CachingVPExpander::convertEVLToMask(IRBuilder<> &Builder,
Value *
CachingVPExpander::expandPredicationInBinaryOperator(IRBuilder<> &Builder,
VPIntrinsic &VPI) {
- assert((isSafeToSpeculativelyExecute(&VPI) ||
- VPI.canIgnoreVectorLengthParam()) &&
+ assert((maySpeculateLanes(VPI) || VPI.canIgnoreVectorLengthParam()) &&
"Implicitly dropping %evl in non-speculatable operator!");
auto OC = static_cast<Instruction::BinaryOps>(*VPI.getFunctionalOpcode());
@@ -298,8 +306,7 @@ static Value *getNeutralReductionElement(const VPReductionIntrinsic &VPI,
Value *
CachingVPExpander::expandPredicationInReduction(IRBuilder<> &Builder,
VPReductionIntrinsic &VPI) {
- assert((isSafeToSpeculativelyExecute(&VPI) ||
- VPI.canIgnoreVectorLengthParam()) &&
+ assert((maySpeculateLanes(VPI) || VPI.canIgnoreVectorLengthParam()) &&
"Implicitly dropping %evl in non-speculatable operator!");
Value *Mask = VPI.getMaskParam();
@@ -473,9 +480,9 @@ struct TransformJob {
bool isDone() const { return Strategy.shouldDoNothing(); }
};
-void sanitizeStrategy(Instruction &I, VPLegalization &LegalizeStrat) {
- // Speculatable instructions do not strictly need predication.
- if (isSafeToSpeculativelyExecute(&I)) {
+void sanitizeStrategy(VPIntrinsic &VPI, VPLegalization &LegalizeStrat) {
+ // Operations with speculatable lanes do not strictly need predication.
+ if (maySpeculateLanes(VPI)) {
// Converting a speculatable VP intrinsic means dropping %mask and %evl.
// No need to expand %evl into the %mask only to ignore that code.
if (LegalizeStrat.OpStrategy == VPLegalization::Convert)
@@ -520,7 +527,7 @@ bool CachingVPExpander::expandVectorPredication() {
if (!VPI)
continue;
auto VPStrat = getVPLegalizationStrategy(*VPI);
- sanitizeStrategy(I, VPStrat);
+ sanitizeStrategy(*VPI, VPStrat);
if (!VPStrat.shouldDoNothing())
Worklist.emplace_back(VPI, VPStrat);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/FEntryInserter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/FEntryInserter.cpp
index c2194929e2e7..68304dd41db0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/FEntryInserter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/FEntryInserter.cpp
@@ -13,12 +13,9 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/FaultMaps.cpp b/contrib/llvm-project/llvm/lib/CodeGen/FaultMaps.cpp
index 1d35b194f218..3ec666227651 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/FaultMaps.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/FaultMaps.cpp
@@ -52,7 +52,7 @@ void FaultMaps::serializeToFaultMapSection() {
// Create the section.
MCSection *FaultMapSection =
OutContext.getObjectFileInfo()->getFaultMapSection();
- OS.SwitchSection(FaultMapSection);
+ OS.switchSection(FaultMapSection);
// Emit a dummy symbol to force section inclusion.
OS.emitLabel(OutContext.getOrCreateSymbol(Twine("__LLVM_FaultMaps")));
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/FinalizeISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/FinalizeISel.cpp
index 00040e92a829..329c9587e321 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/FinalizeISel.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/FinalizeISel.cpp
@@ -16,11 +16,9 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/InitializePasses.h"
-#include "llvm/Support/Debug.h"
using namespace llvm;
#define DEBUG_TYPE "finalize-isel"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp b/contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp
index ec6bf18b2769..252910fd9462 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp
@@ -24,10 +24,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/StackMaps.h"
-#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/IR/Statepoint.h"
#include "llvm/InitializePasses.h"
@@ -156,12 +153,17 @@ static Register performCopyPropagation(Register Reg,
RI = ++MachineBasicBlock::iterator(Def);
IsKill = DestSrc->Source->isKill();
- // There are no uses of original register between COPY and STATEPOINT.
- // There can't be any after STATEPOINT, so we can eliminate Def.
if (!Use) {
+ // There are no uses of original register between COPY and STATEPOINT.
+ // There can't be any after STATEPOINT, so we can eliminate Def.
LLVM_DEBUG(dbgs() << "spillRegisters: removing dead copy " << *Def);
Def->eraseFromParent();
+ } else if (IsKill) {
+ // COPY will remain in place, spill will be inserted *after* it, so it is
+ // not a kill of source anymore.
+ const_cast<MachineOperand *>(DestSrc->Source)->setIsKill(false);
}
+
return SrcReg;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GCMetadata.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GCMetadata.cpp
index af5515cc6bfd..4d27143c5298 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GCMetadata.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GCMetadata.cpp
@@ -11,16 +11,13 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GCMetadata.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Function.h"
#include "llvm/InitializePasses.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Pass.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
#include <cassert>
#include <memory>
#include <string>
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GCRootLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GCRootLowering.cpp
index 637a877810a1..80feb0045406 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GCRootLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GCRootLowering.cpp
@@ -14,7 +14,6 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
@@ -24,9 +23,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/MC/MCContext.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp
index f9bfe8518083..ac140e745600 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp
@@ -67,7 +67,8 @@ bool CSEConfigFull::shouldCSEOpc(unsigned Opc) {
}
bool CSEConfigConstantOnly::shouldCSEOpc(unsigned Opc) {
- return Opc == TargetOpcode::G_CONSTANT || Opc == TargetOpcode::G_IMPLICIT_DEF;
+ return Opc == TargetOpcode::G_CONSTANT || Opc == TargetOpcode::G_FCONSTANT ||
+ Opc == TargetOpcode::G_IMPLICIT_DEF;
}
std::unique_ptr<CSEConfigBase>
@@ -88,7 +89,7 @@ void GISelCSEInfo::setMF(MachineFunction &MF) {
this->MRI = &MF.getRegInfo();
}
-GISelCSEInfo::~GISelCSEInfo() {}
+GISelCSEInfo::~GISelCSEInfo() = default;
bool GISelCSEInfo::isUniqueMachineInstValid(
const UniqueMachineInstr &UMI) const {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
index 1a642e233a6a..a432e4ed7fb7 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
@@ -12,6 +12,7 @@
//
#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -174,6 +175,7 @@ MachineInstrBuilder CSEMIRBuilder::buildInstr(unsigned Opc,
default:
break;
case TargetOpcode::G_ADD:
+ case TargetOpcode::G_PTR_ADD:
case TargetOpcode::G_AND:
case TargetOpcode::G_ASHR:
case TargetOpcode::G_LSHR:
@@ -185,23 +187,54 @@ MachineInstrBuilder CSEMIRBuilder::buildInstr(unsigned Opc,
case TargetOpcode::G_UDIV:
case TargetOpcode::G_SDIV:
case TargetOpcode::G_UREM:
- case TargetOpcode::G_SREM: {
+ case TargetOpcode::G_SREM:
+ case TargetOpcode::G_SMIN:
+ case TargetOpcode::G_SMAX:
+ case TargetOpcode::G_UMIN:
+ case TargetOpcode::G_UMAX: {
// Try to constant fold these.
assert(SrcOps.size() == 2 && "Invalid sources");
assert(DstOps.size() == 1 && "Invalid dsts");
- if (SrcOps[0].getLLTTy(*getMRI()).isVector()) {
+ LLT SrcTy = SrcOps[0].getLLTTy(*getMRI());
+
+ if (Opc == TargetOpcode::G_PTR_ADD &&
+ getDataLayout().isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
+ break;
+
+ if (SrcTy.isVector()) {
// Try to constant fold vector constants.
- Register VecCst = ConstantFoldVectorBinop(
- Opc, SrcOps[0].getReg(), SrcOps[1].getReg(), *getMRI(), *this);
- if (VecCst)
- return buildCopy(DstOps[0], VecCst);
+ SmallVector<APInt> VecCst = ConstantFoldVectorBinop(
+ Opc, SrcOps[0].getReg(), SrcOps[1].getReg(), *getMRI());
+ if (!VecCst.empty())
+ return buildBuildVectorConstant(DstOps[0], VecCst);
break;
}
+
if (Optional<APInt> Cst = ConstantFoldBinOp(Opc, SrcOps[0].getReg(),
SrcOps[1].getReg(), *getMRI()))
return buildConstant(DstOps[0], *Cst);
break;
}
+ case TargetOpcode::G_FADD:
+ case TargetOpcode::G_FSUB:
+ case TargetOpcode::G_FMUL:
+ case TargetOpcode::G_FDIV:
+ case TargetOpcode::G_FREM:
+ case TargetOpcode::G_FMINNUM:
+ case TargetOpcode::G_FMAXNUM:
+ case TargetOpcode::G_FMINNUM_IEEE:
+ case TargetOpcode::G_FMAXNUM_IEEE:
+ case TargetOpcode::G_FMINIMUM:
+ case TargetOpcode::G_FMAXIMUM:
+ case TargetOpcode::G_FCOPYSIGN: {
+ // Try to constant fold these.
+ assert(SrcOps.size() == 2 && "Invalid sources");
+ assert(DstOps.size() == 1 && "Invalid dsts");
+ if (Optional<APFloat> Cst = ConstantFoldFPBinOp(
+ Opc, SrcOps[0].getReg(), SrcOps[1].getReg(), *getMRI()))
+ return buildFConstant(DstOps[0], *Cst);
+ break;
+ }
case TargetOpcode::G_SEXT_INREG: {
assert(DstOps.size() == 1 && "Invalid dst ops");
assert(SrcOps.size() == 2 && "Invalid src ops");
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 1ec7868f2234..081c8b125f17 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -11,16 +11,16 @@
///
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/CallingConvLower.h"
-#include "llvm/CodeGen/GlobalISel/CallLowering.h"
-#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/Target/TargetMachine.h"
@@ -698,10 +698,12 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
ValTy, extendOpFromFlags(Args[i].Flags[0]));
}
+ bool BigEndianPartOrdering = TLI->hasBigEndianPartOrdering(OrigVT, DL);
for (unsigned Part = 0; Part < NumParts; ++Part) {
Register ArgReg = Args[i].Regs[Part];
// There should be Regs.size() ArgLocs per argument.
- VA = ArgLocs[j + Part];
+ unsigned Idx = BigEndianPartOrdering ? NumParts - 1 - Part : Part;
+ CCValAssign &VA = ArgLocs[j + Idx];
const ISD::ArgFlagsTy Flags = Args[i].Flags[Part];
if (VA.isMemLoc() && !Flags.isByVal()) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
index 30f8838805b5..1a5fe3e84c17 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
@@ -13,14 +13,13 @@
#include "llvm/CodeGen/GlobalISel/Combiner.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
-#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/GISelWorkList.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/Debug.h"
#define DEBUG_TYPE "gi-combiner"
@@ -57,8 +56,7 @@ class WorkListMaintainer : public GISelChangeObserver {
public:
WorkListMaintainer(WorkListTy &WorkList) : WorkList(WorkList) {}
- virtual ~WorkListMaintainer() {
- }
+ virtual ~WorkListMaintainer() = default;
void erasingInstr(MachineInstr &MI) override {
LLVM_DEBUG(dbgs() << "Erasing: " << MI << "\n");
@@ -115,7 +113,7 @@ bool Combiner::combineMachineInstrs(MachineFunction &MF,
bool MFChanged = false;
bool Changed;
- MachineIRBuilder &B = *Builder.get();
+ MachineIRBuilder &B = *Builder;
do {
// Collect all instructions. Do a post order traversal for basic blocks and
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index d6a009744161..2c94f87804ac 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -8,7 +8,6 @@
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallBitVector.h"
-#include "llvm/CodeGen/GlobalISel/Combiner.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
@@ -16,23 +15,22 @@
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterBankInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/DivisionByConstantInfo.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetMachine.h"
#include <tuple>
#define DEBUG_TYPE "gi-combiner"
@@ -131,9 +129,27 @@ isBigEndian(const SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx,
return BigEndian;
}
+bool CombinerHelper::isPreLegalize() const { return !LI; }
+
+bool CombinerHelper::isLegal(const LegalityQuery &Query) const {
+ assert(LI && "Must have LegalizerInfo to query isLegal!");
+ return LI->getAction(Query).Action == LegalizeActions::Legal;
+}
+
bool CombinerHelper::isLegalOrBeforeLegalizer(
const LegalityQuery &Query) const {
- return !LI || LI->getAction(Query).Action == LegalizeActions::Legal;
+ return isPreLegalize() || isLegal(Query);
+}
+
+bool CombinerHelper::isConstantLegalOrBeforeLegalizer(const LLT Ty) const {
+ if (!Ty.isVector())
+ return isLegalOrBeforeLegalizer({TargetOpcode::G_CONSTANT, {Ty}});
+ // Vector constants are represented as a G_BUILD_VECTOR of scalar G_CONSTANTs.
+ if (isPreLegalize())
+ return true;
+ LLT EltTy = Ty.getElementType();
+ return isLegal({TargetOpcode::G_BUILD_VECTOR, {Ty, EltTy}}) &&
+ isLegal({TargetOpcode::G_CONSTANT, {EltTy}});
}
void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, Register FromReg,
@@ -1275,12 +1291,12 @@ bool CombinerHelper::matchCombineConstantFoldFpUnary(MachineInstr &MI,
Register SrcReg = MI.getOperand(1).getReg();
LLT DstTy = MRI.getType(DstReg);
Cst = constantFoldFpUnary(MI.getOpcode(), DstTy, SrcReg, MRI);
- return Cst.hasValue();
+ return Cst.has_value();
}
void CombinerHelper::applyCombineConstantFoldFpUnary(MachineInstr &MI,
Optional<APFloat> &Cst) {
- assert(Cst.hasValue() && "Optional is unexpectedly empty!");
+ assert(Cst && "Optional is unexpectedly empty!");
Builder.setInstrAndDebugLoc(MI);
MachineFunction &MF = Builder.getMF();
auto *FPVal = ConstantFP::get(MF.getFunction().getContext(), *Cst);
@@ -2350,6 +2366,19 @@ bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1,
if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad(nullptr))
return false;
+ // If both instructions are loads or stores, they are equal only if both
+ // are dereferenceable invariant loads with the same number of bits.
+ if (I1->mayLoadOrStore() && I2->mayLoadOrStore()) {
+ GLoadStore *LS1 = dyn_cast<GLoadStore>(I1);
+ GLoadStore *LS2 = dyn_cast<GLoadStore>(I2);
+ if (!LS1 || !LS2)
+ return false;
+
+ if (!I2->isDereferenceableInvariantLoad(nullptr) ||
+ (LS1->getMemSizeInBits() != LS2->getMemSizeInBits()))
+ return false;
+ }
+
// Check for physical registers on the instructions first to avoid cases
// like this:
//
@@ -2397,7 +2426,7 @@ bool CombinerHelper::matchConstantOp(const MachineOperand &MOP, int64_t C) {
return false;
auto *MI = MRI.getVRegDef(MOP.getReg());
auto MaybeCst = isConstantOrConstantSplatVector(*MI, MRI);
- return MaybeCst.hasValue() && MaybeCst->getBitWidth() <= 64 &&
+ return MaybeCst && MaybeCst->getBitWidth() <= 64 &&
MaybeCst->getSExtValue() == C;
}
@@ -2916,7 +2945,7 @@ bool CombinerHelper::matchNotCmp(MachineInstr &MI,
int64_t Cst;
if (Ty.isVector()) {
MachineInstr *CstDef = MRI.getVRegDef(CstReg);
- auto MaybeCst = getBuildVectorConstantSplat(*CstDef, MRI);
+ auto MaybeCst = getIConstantSplatSExtVal(*CstDef, MRI);
if (!MaybeCst)
return false;
if (!isConstValidTrue(TLI, Ty.getScalarSizeInBits(), *MaybeCst, true, IsFP))
@@ -3049,6 +3078,102 @@ void CombinerHelper::applySimplifyURemByPow2(MachineInstr &MI) {
MI.eraseFromParent();
}
+bool CombinerHelper::matchFoldBinOpIntoSelect(MachineInstr &MI,
+ unsigned &SelectOpNo) {
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+
+ Register OtherOperandReg = RHS;
+ SelectOpNo = 1;
+ MachineInstr *Select = MRI.getVRegDef(LHS);
+
+ // Don't do this unless the old select is going away. We want to eliminate the
+ // binary operator, not replace a binop with a select.
+ if (Select->getOpcode() != TargetOpcode::G_SELECT ||
+ !MRI.hasOneNonDBGUse(LHS)) {
+ OtherOperandReg = LHS;
+ SelectOpNo = 2;
+ Select = MRI.getVRegDef(RHS);
+ if (Select->getOpcode() != TargetOpcode::G_SELECT ||
+ !MRI.hasOneNonDBGUse(RHS))
+ return false;
+ }
+
+ MachineInstr *SelectLHS = MRI.getVRegDef(Select->getOperand(2).getReg());
+ MachineInstr *SelectRHS = MRI.getVRegDef(Select->getOperand(3).getReg());
+
+ if (!isConstantOrConstantVector(*SelectLHS, MRI,
+ /*AllowFP*/ true,
+ /*AllowOpaqueConstants*/ false))
+ return false;
+ if (!isConstantOrConstantVector(*SelectRHS, MRI,
+ /*AllowFP*/ true,
+ /*AllowOpaqueConstants*/ false))
+ return false;
+
+ unsigned BinOpcode = MI.getOpcode();
+
+ // We know know one of the operands is a select of constants. Now verify that
+ // the other binary operator operand is either a constant, or we can handle a
+ // variable.
+ bool CanFoldNonConst =
+ (BinOpcode == TargetOpcode::G_AND || BinOpcode == TargetOpcode::G_OR) &&
+ (isNullOrNullSplat(*SelectLHS, MRI) ||
+ isAllOnesOrAllOnesSplat(*SelectLHS, MRI)) &&
+ (isNullOrNullSplat(*SelectRHS, MRI) ||
+ isAllOnesOrAllOnesSplat(*SelectRHS, MRI));
+ if (CanFoldNonConst)
+ return true;
+
+ return isConstantOrConstantVector(*MRI.getVRegDef(OtherOperandReg), MRI,
+ /*AllowFP*/ true,
+ /*AllowOpaqueConstants*/ false);
+}
+
+/// \p SelectOperand is the operand in binary operator \p MI that is the select
+/// to fold.
+bool CombinerHelper::applyFoldBinOpIntoSelect(MachineInstr &MI,
+ const unsigned &SelectOperand) {
+ Builder.setInstrAndDebugLoc(MI);
+
+ Register Dst = MI.getOperand(0).getReg();
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+ MachineInstr *Select = MRI.getVRegDef(MI.getOperand(SelectOperand).getReg());
+
+ Register SelectCond = Select->getOperand(1).getReg();
+ Register SelectTrue = Select->getOperand(2).getReg();
+ Register SelectFalse = Select->getOperand(3).getReg();
+
+ LLT Ty = MRI.getType(Dst);
+ unsigned BinOpcode = MI.getOpcode();
+
+ Register FoldTrue, FoldFalse;
+
+ // We have a select-of-constants followed by a binary operator with a
+ // constant. Eliminate the binop by pulling the constant math into the select.
+ // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
+ if (SelectOperand == 1) {
+ // TODO: SelectionDAG verifies this actually constant folds before
+ // committing to the combine.
+
+ FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {SelectTrue, RHS}).getReg(0);
+ FoldFalse =
+ Builder.buildInstr(BinOpcode, {Ty}, {SelectFalse, RHS}).getReg(0);
+ } else {
+ FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectTrue}).getReg(0);
+ FoldFalse =
+ Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectFalse}).getReg(0);
+ }
+
+ Builder.buildSelect(Dst, SelectCond, FoldTrue, FoldFalse, MI.getFlags());
+ Observer.erasingInstr(*Select);
+ Select->eraseFromParent();
+ MI.eraseFromParent();
+
+ return true;
+}
+
Optional<SmallVector<Register, 8>>
CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const {
assert(Root->getOpcode() == TargetOpcode::G_OR && "Expected G_OR only!");
@@ -3340,7 +3465,7 @@ bool CombinerHelper::matchLoadOrCombine(
// BSWAP.
bool IsBigEndianTarget = MF.getDataLayout().isBigEndian();
Optional<bool> IsBigEndian = isBigEndian(MemOffset2Idx, LowestIdx);
- if (!IsBigEndian.hasValue())
+ if (!IsBigEndian)
return false;
bool NeedsBSwap = IsBigEndianTarget != *IsBigEndian;
if (NeedsBSwap && !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {Ty}}))
@@ -3848,7 +3973,7 @@ bool CombinerHelper::matchExtractAllEltsFromBuildVector(
auto Cst = getIConstantVRegVal(II.getOperand(2).getReg(), MRI);
if (!Cst)
return false;
- unsigned Idx = Cst.getValue().getZExtValue();
+ unsigned Idx = Cst->getZExtValue();
if (Idx >= NumElts)
return false; // Out of range.
ExtractedElts.set(Idx);
@@ -3904,10 +4029,9 @@ bool CombinerHelper::matchOrShiftToFunnelShift(MachineInstr &MI,
// Given constants C0 and C1 such that C0 + C1 is bit-width:
// (or (shl x, C0), (lshr y, C1)) -> (fshl x, y, C0) or (fshr x, y, C1)
- // TODO: Match constant splat.
int64_t CstShlAmt, CstLShrAmt;
- if (mi_match(ShlAmt, MRI, m_ICst(CstShlAmt)) &&
- mi_match(LShrAmt, MRI, m_ICst(CstLShrAmt)) &&
+ if (mi_match(ShlAmt, MRI, m_ICstOrSplat(CstShlAmt)) &&
+ mi_match(LShrAmt, MRI, m_ICstOrSplat(CstLShrAmt)) &&
CstShlAmt + CstLShrAmt == BitWidth) {
FshOpc = TargetOpcode::G_FSHR;
Amt = LShrAmt;
@@ -3958,7 +4082,7 @@ void CombinerHelper::applyFunnelShiftToRotate(MachineInstr &MI) {
Observer.changingInstr(MI);
MI.setDesc(Builder.getTII().get(IsFSHL ? TargetOpcode::G_ROTL
: TargetOpcode::G_ROTR));
- MI.RemoveOperand(2);
+ MI.removeOperand(2);
Observer.changedInstr(MI);
}
@@ -4100,18 +4224,23 @@ bool CombinerHelper::matchAndOrDisjointMask(
return false;
Register Src;
- int64_t MaskAnd;
- int64_t MaskOr;
+ Register AndMaskReg;
+ int64_t AndMaskBits;
+ int64_t OrMaskBits;
if (!mi_match(MI, MRI,
- m_GAnd(m_GOr(m_Reg(Src), m_ICst(MaskOr)), m_ICst(MaskAnd))))
+ m_GAnd(m_GOr(m_Reg(Src), m_ICst(OrMaskBits)),
+ m_all_of(m_ICst(AndMaskBits), m_Reg(AndMaskReg)))))
return false;
- // Check if MaskOr could turn on any bits in Src.
- if (MaskAnd & MaskOr)
+ // Check if OrMask could turn on any bits in Src.
+ if (AndMaskBits & OrMaskBits)
return false;
MatchInfo = [=, &MI](MachineIRBuilder &B) {
Observer.changingInstr(MI);
+ // Canonicalize the result to have the constant on the RHS.
+ if (MI.getOperand(1).getReg() == AndMaskReg)
+ MI.getOperand(2).setReg(AndMaskReg);
MI.getOperand(1).setReg(Src);
Observer.changedInstr(MI);
};
@@ -4259,6 +4388,14 @@ bool CombinerHelper::matchBitfieldExtractFromShrAnd(
if (ShrAmt < 0 || ShrAmt >= Size)
return false;
+ // If the shift subsumes the mask, emit the 0 directly.
+ if (0 == (SMask >> ShrAmt)) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildConstant(Dst, 0);
+ };
+ return true;
+ }
+
// Check that ubfx can do the extraction, with no holes in the mask.
uint64_t UMask = SMask;
UMask |= maskTrailingOnes<uint64_t>(ShrAmt);
@@ -4585,6 +4722,42 @@ bool CombinerHelper::matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo) {
return true;
}
+bool CombinerHelper::matchMulOBy0(MachineInstr &MI, BuildFnTy &MatchInfo) {
+ // (G_*MULO x, 0) -> 0 + no carry out
+ assert(MI.getOpcode() == TargetOpcode::G_UMULO ||
+ MI.getOpcode() == TargetOpcode::G_SMULO);
+ if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(0)))
+ return false;
+ Register Dst = MI.getOperand(0).getReg();
+ Register Carry = MI.getOperand(1).getReg();
+ if (!isConstantLegalOrBeforeLegalizer(MRI.getType(Dst)) ||
+ !isConstantLegalOrBeforeLegalizer(MRI.getType(Carry)))
+ return false;
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildConstant(Dst, 0);
+ B.buildConstant(Carry, 0);
+ };
+ return true;
+}
+
+bool CombinerHelper::matchAddOBy0(MachineInstr &MI, BuildFnTy &MatchInfo) {
+ // (G_*ADDO x, 0) -> x + no carry out
+ assert(MI.getOpcode() == TargetOpcode::G_UADDO ||
+ MI.getOpcode() == TargetOpcode::G_SADDO);
+ if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(0)))
+ return false;
+ Register Carry = MI.getOperand(1).getReg();
+ if (!isConstantLegalOrBeforeLegalizer(MRI.getType(Carry)))
+ return false;
+ Register Dst = MI.getOperand(0).getReg();
+ Register LHS = MI.getOperand(2).getReg();
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildCopy(Dst, LHS);
+ B.buildConstant(Carry, 0);
+ };
+ return true;
+}
+
MachineInstr *CombinerHelper::buildUDivUsingMul(MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::G_UDIV);
auto &UDiv = cast<GenericMachineInstr>(MI);
@@ -5376,6 +5549,106 @@ bool CombinerHelper::matchCombineFSubFpExtFNegFMulToFMadOrFMA(
return false;
}
+bool CombinerHelper::matchSelectToLogical(MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ GSelect &Sel = cast<GSelect>(MI);
+ Register DstReg = Sel.getReg(0);
+ Register Cond = Sel.getCondReg();
+ Register TrueReg = Sel.getTrueReg();
+ Register FalseReg = Sel.getFalseReg();
+
+ auto *TrueDef = getDefIgnoringCopies(TrueReg, MRI);
+ auto *FalseDef = getDefIgnoringCopies(FalseReg, MRI);
+
+ const LLT CondTy = MRI.getType(Cond);
+ const LLT OpTy = MRI.getType(TrueReg);
+ if (CondTy != OpTy || OpTy.getScalarSizeInBits() != 1)
+ return false;
+
+ // We have a boolean select.
+
+ // select Cond, Cond, F --> or Cond, F
+ // select Cond, 1, F --> or Cond, F
+ auto MaybeCstTrue = isConstantOrConstantSplatVector(*TrueDef, MRI);
+ if (Cond == TrueReg || (MaybeCstTrue && MaybeCstTrue->isOne())) {
+ MatchInfo = [=](MachineIRBuilder &MIB) {
+ MIB.buildOr(DstReg, Cond, FalseReg);
+ };
+ return true;
+ }
+
+ // select Cond, T, Cond --> and Cond, T
+ // select Cond, T, 0 --> and Cond, T
+ auto MaybeCstFalse = isConstantOrConstantSplatVector(*FalseDef, MRI);
+ if (Cond == FalseReg || (MaybeCstFalse && MaybeCstFalse->isZero())) {
+ MatchInfo = [=](MachineIRBuilder &MIB) {
+ MIB.buildAnd(DstReg, Cond, TrueReg);
+ };
+ return true;
+ }
+
+ // select Cond, T, 1 --> or (not Cond), T
+ if (MaybeCstFalse && MaybeCstFalse->isOne()) {
+ MatchInfo = [=](MachineIRBuilder &MIB) {
+ MIB.buildOr(DstReg, MIB.buildNot(OpTy, Cond), TrueReg);
+ };
+ return true;
+ }
+
+ // select Cond, 0, F --> and (not Cond), F
+ if (MaybeCstTrue && MaybeCstTrue->isZero()) {
+ MatchInfo = [=](MachineIRBuilder &MIB) {
+ MIB.buildAnd(DstReg, MIB.buildNot(OpTy, Cond), FalseReg);
+ };
+ return true;
+ }
+ return false;
+}
+
+bool CombinerHelper::matchCombineFMinMaxNaN(MachineInstr &MI,
+ unsigned &IdxToPropagate) {
+ bool PropagateNaN;
+ switch (MI.getOpcode()) {
+ default:
+ return false;
+ case TargetOpcode::G_FMINNUM:
+ case TargetOpcode::G_FMAXNUM:
+ PropagateNaN = false;
+ break;
+ case TargetOpcode::G_FMINIMUM:
+ case TargetOpcode::G_FMAXIMUM:
+ PropagateNaN = true;
+ break;
+ }
+
+ auto MatchNaN = [&](unsigned Idx) {
+ Register MaybeNaNReg = MI.getOperand(Idx).getReg();
+ const ConstantFP *MaybeCst = getConstantFPVRegVal(MaybeNaNReg, MRI);
+ if (!MaybeCst || !MaybeCst->getValueAPF().isNaN())
+ return false;
+ IdxToPropagate = PropagateNaN ? Idx : (Idx == 1 ? 2 : 1);
+ return true;
+ };
+
+ return MatchNaN(1) || MatchNaN(2);
+}
+
+bool CombinerHelper::matchAddSubSameReg(MachineInstr &MI, Register &Src) {
+ assert(MI.getOpcode() == TargetOpcode::G_ADD && "Expected a G_ADD");
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+
+ // Helper lambda to check for opportunities for
+ // A + (B - A) -> B
+ // (B - A) + A -> B
+ auto CheckFold = [&](Register MaybeSub, Register MaybeSameReg) {
+ Register Reg;
+ return mi_match(MaybeSub, MRI, m_GSub(m_Reg(Src), m_Reg(Reg))) &&
+ Reg == MaybeSameReg;
+ };
+ return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
+}
+
bool CombinerHelper::tryCombine(MachineInstr &MI) {
if (tryCombineCopy(MI))
return true;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
index 64c2f0d5f8e4..4f03af0fce82 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
@@ -567,6 +567,26 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
Known = KnownBits::ashr(KnownBits::shl(Known, ShiftKnown), ShiftKnown);
break;
}
+ case TargetOpcode::G_UADDO:
+ case TargetOpcode::G_UADDE:
+ case TargetOpcode::G_SADDO:
+ case TargetOpcode::G_SADDE:
+ case TargetOpcode::G_USUBO:
+ case TargetOpcode::G_USUBE:
+ case TargetOpcode::G_SSUBO:
+ case TargetOpcode::G_SSUBE:
+ case TargetOpcode::G_UMULO:
+ case TargetOpcode::G_SMULO: {
+ if (MI.getOperand(1).getReg() == R) {
+ // If we know the result of a compare has the top bits zero, use this
+ // info.
+ if (TL.getBooleanContents(DstTy.isVector(), false) ==
+ TargetLowering::ZeroOrOneBooleanContent &&
+ BitWidth > 1)
+ Known.Zero.setBitsFrom(1);
+ }
+ break;
+ }
}
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
@@ -673,6 +693,27 @@ unsigned GISelKnownBits::computeNumSignBits(Register R,
MI.getOperand(3).getReg(), DemandedElts,
Depth + 1);
}
+ case TargetOpcode::G_SADDO:
+ case TargetOpcode::G_SADDE:
+ case TargetOpcode::G_UADDO:
+ case TargetOpcode::G_UADDE:
+ case TargetOpcode::G_SSUBO:
+ case TargetOpcode::G_SSUBE:
+ case TargetOpcode::G_USUBO:
+ case TargetOpcode::G_USUBE:
+ case TargetOpcode::G_SMULO:
+ case TargetOpcode::G_UMULO: {
+ // If compares returns 0/-1, all bits are sign bits.
+ // We know that we have an integer-based boolean since these operations
+ // are only available for integer.
+ if (MI.getOperand(1).getReg() == R) {
+ if (TL.getBooleanContents(DstTy.isVector(), false) ==
+ TargetLowering::ZeroOrNegativeOneBooleanContent)
+ return TyBits;
+ }
+
+ break;
+ }
case TargetOpcode::G_INTRINSIC:
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
default: {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp
index 252b931602c6..efcc40641ea8 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp
@@ -11,7 +11,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/InitializePasses.h"
-#include "llvm/PassRegistry.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 847df84afba6..a2af66d28f4a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -16,10 +16,11 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
-#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
+#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h"
@@ -47,7 +48,6 @@
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
@@ -78,7 +78,6 @@
#include "llvm/Transforms/Utils/MemoryOpRemark.h"
#include <algorithm>
#include <cassert>
-#include <cstddef>
#include <cstdint>
#include <iterator>
#include <string>
@@ -1818,7 +1817,7 @@ static unsigned getConstrainedOpcode(Intrinsic::ID ID) {
bool IRTranslator::translateConstrainedFPIntrinsic(
const ConstrainedFPIntrinsic &FPI, MachineIRBuilder &MIRBuilder) {
- fp::ExceptionBehavior EB = FPI.getExceptionBehavior().getValue();
+ fp::ExceptionBehavior EB = *FPI.getExceptionBehavior();
unsigned Opcode = getConstrainedOpcode(FPI.getIntrinsicID());
if (!Opcode)
@@ -2252,6 +2251,23 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
Info.OrigRet = {Register(), Type::getVoidTy(CI.getContext()), 0};
return CLI->lowerCall(MIRBuilder, Info);
}
+ case Intrinsic::fptrunc_round: {
+ unsigned Flags = MachineInstr::copyFlagsFromInstruction(CI);
+
+ // Convert the metadata argument to a constant integer
+ Metadata *MD = cast<MetadataAsValue>(CI.getArgOperand(1))->getMetadata();
+ Optional<RoundingMode> RoundMode =
+ convertStrToRoundingMode(cast<MDString>(MD)->getString());
+
+ // Add the Rounding mode as an integer
+ MIRBuilder
+ .buildInstr(TargetOpcode::G_INTRINSIC_FPTRUNC_ROUND,
+ {getOrCreateVReg(CI)},
+ {getOrCreateVReg(*CI.getArgOperand(0))}, Flags)
+ .addImm((int)*RoundMode);
+
+ return true;
+ }
#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \
case Intrinsic::INTRINSIC:
#include "llvm/IR/ConstrainedOps.def"
@@ -2409,7 +2425,7 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
TargetLowering::IntrinsicInfo Info;
// TODO: Add a GlobalISel version of getTgtMemIntrinsic.
if (TLI.getTgtMemIntrinsic(Info, CI, *MF, ID)) {
- Align Alignment = Info.align.getValueOr(
+ Align Alignment = Info.align.value_or(
DL->getABITypeAlign(Info.memVT.getTypeForEVT(F->getContext())));
LLT MemTy = Info.memVT.isSimple()
? getLLTForMVT(Info.memVT.getSimpleVT())
@@ -2934,15 +2950,6 @@ void IRTranslator::finishPendingPhis() {
}
}
-bool IRTranslator::valueIsSplit(const Value &V,
- SmallVectorImpl<uint64_t> *Offsets) {
- SmallVector<LLT, 4> SplitTys;
- if (Offsets && !Offsets->empty())
- Offsets->clear();
- computeValueLLTs(*DL, *V.getType(), SplitTys, Offsets);
- return SplitTys.size() > 1;
-}
-
bool IRTranslator::translate(const Instruction &Inst) {
CurBuilder->setDebugLoc(Inst.getDebugLoc());
@@ -2984,7 +2991,7 @@ bool IRTranslator::translate(const Constant &C, Register Reg) {
// Return the scalar if it is a <1 x Ty> vector.
unsigned NumElts = CAZ->getElementCount().getFixedValue();
if (NumElts == 1)
- return translateCopy(C, *CAZ->getElementValue(0u), *EntryBuilder.get());
+ return translateCopy(C, *CAZ->getElementValue(0u), *EntryBuilder);
SmallVector<Register, 4> Ops;
for (unsigned I = 0; I < NumElts; ++I) {
Constant &Elt = *CAZ->getElementValue(I);
@@ -2994,8 +3001,7 @@ bool IRTranslator::translate(const Constant &C, Register Reg) {
} else if (auto CV = dyn_cast<ConstantDataVector>(&C)) {
// Return the scalar if it is a <1 x Ty> vector.
if (CV->getNumElements() == 1)
- return translateCopy(C, *CV->getElementAsConstant(0),
- *EntryBuilder.get());
+ return translateCopy(C, *CV->getElementAsConstant(0), *EntryBuilder);
SmallVector<Register, 4> Ops;
for (unsigned i = 0; i < CV->getNumElements(); ++i) {
Constant &Elt = *CV->getElementAsConstant(i);
@@ -3013,7 +3019,7 @@ bool IRTranslator::translate(const Constant &C, Register Reg) {
}
} else if (auto CV = dyn_cast<ConstantVector>(&C)) {
if (CV->getNumOperands() == 1)
- return translateCopy(C, *CV->getOperand(0), *EntryBuilder.get());
+ return translateCopy(C, *CV->getOperand(0), *EntryBuilder);
SmallVector<Register, 4> Ops;
for (unsigned i = 0; i < CV->getNumOperands(); ++i) {
Ops.push_back(getOrCreateVReg(*CV->getOperand(i)));
@@ -3255,14 +3261,13 @@ bool IRTranslator::emitSPDescriptorFailure(StackProtectorDescriptor &SPD,
return false;
}
- // On PS4, the "return address" must still be within the calling function,
- // even if it's at the very end, so emit an explicit TRAP here.
- // Passing 'true' for doesNotReturn above won't generate the trap for us.
+ // On PS4/PS5, the "return address" must still be within the calling
+ // function, even if it's at the very end, so emit an explicit TRAP here.
// WebAssembly needs an unreachable instruction after a non-returning call,
// because the function return type can be different from __stack_chk_fail's
// return type (void).
const TargetMachine &TM = MF->getTarget();
- if (TM.getTargetTriple().isPS4CPU() || TM.getTargetTriple().isWasm()) {
+ if (TM.getTargetTriple().isPS() || TM.getTargetTriple().isWasm()) {
LLVM_DEBUG(dbgs() << "Unhandled trap emission for stack protector fail\n");
return false;
}
@@ -3413,7 +3418,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
}
}
- if (!CLI->lowerFormalArguments(*EntryBuilder.get(), F, VRegArgs, FuncInfo)) {
+ if (!CLI->lowerFormalArguments(*EntryBuilder, F, VRegArgs, FuncInfo)) {
OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
F.getSubprogram(), &F.getEntryBlock());
R << "unable to lower arguments: " << ore::NV("Prototype", F.getType());
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
index e5f95ca5aa73..95ae8383b6fa 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
@@ -12,15 +12,10 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h"
-#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
-#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#define DEBUG_TYPE "inline-asm-lowering"
@@ -150,6 +145,7 @@ static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
case TargetLowering::C_RegisterClass:
return 2;
case TargetLowering::C_Memory:
+ case TargetLowering::C_Address:
return 3;
}
llvm_unreachable("Invalid constraint type");
@@ -310,7 +306,7 @@ bool InlineAsmLowering::lowerInlineAsm(
// If this is an indirect operand, the operand is a pointer to the
// accessed type.
if (OpInfo.isIndirect) {
- OpTy = Call.getAttributes().getParamElementType(ArgNo);
+ OpTy = Call.getParamElementType(ArgNo);
assert(OpTy && "Indirect operand must have elementtype attribute");
}
@@ -649,6 +645,8 @@ bool InlineAsmLowering::lowerInlineAsm(
return false;
case TargetLowering::C_Memory:
break; // Already handled.
+ case TargetLowering::C_Address:
+ break; // Silence warning.
case TargetLowering::C_Unknown:
LLVM_DEBUG(dbgs() << "Unexpected unknown constraint\n");
return false;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
index 2bb5addefe48..28f3b425c67d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
@@ -12,8 +12,6 @@
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/ScopeExit.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
@@ -23,14 +21,13 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Config/config.h"
-#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/CodeGenCoverage.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetMachine.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
index 1d0c106fd5db..8959d215ecd1 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
@@ -13,16 +13,9 @@
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/TargetRegisterInfo.h"
-#include "llvm/MC/MCInstrDesc.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include <cassert>
#define DEBUG_TYPE "instructionselector"
@@ -66,6 +59,10 @@ bool InstructionSelector::isObviouslySafeToFold(MachineInstr &MI,
std::next(MI.getIterator()) == IntoMI.getIterator())
return true;
+ // Convergent instructions cannot be moved in the CFG.
+ if (MI.isConvergent() && MI.getParent() != IntoMI.getParent())
+ return false;
+
return !MI.mayLoadOrStore() && !MI.mayRaiseFPException() &&
!MI.hasUnmodeledSideEffects() && MI.implicit_operands().empty();
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
index 1f0738a8d9d2..54a82cac95d5 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
@@ -188,6 +188,13 @@ LegalityPredicate LegalityPredicates::memSizeInBytesNotPow2(unsigned MMOIdx) {
};
}
+LegalityPredicate LegalityPredicates::memSizeNotByteSizePow2(unsigned MMOIdx) {
+ return [=](const LegalityQuery &Query) {
+ const LLT MemTy = Query.MMODescrs[MMOIdx].MemoryTy;
+ return !MemTy.isByteSized() || !isPowerOf2_32(MemTy.getSizeInBytes());
+ };
+}
+
LegalityPredicate LegalityPredicates::numElementsNotPow2(unsigned TypeIdx) {
return [=](const LegalityQuery &Query) {
const LLT QueryTy = Query.Types[TypeIdx];
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp
index 75b7fcb5663a..25c1db91b05d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp
@@ -43,6 +43,27 @@ LegalizeMutation LegalizeMutations::changeElementTo(unsigned TypeIdx,
};
}
+LegalizeMutation LegalizeMutations::changeElementCountTo(unsigned TypeIdx,
+ unsigned FromTypeIdx) {
+ return [=](const LegalityQuery &Query) {
+ const LLT OldTy = Query.Types[TypeIdx];
+ const LLT NewTy = Query.Types[FromTypeIdx];
+ ElementCount NewEltCount =
+ NewTy.isVector() ? NewTy.getElementCount() : ElementCount::getFixed(1);
+ return std::make_pair(TypeIdx, OldTy.changeElementCount(NewEltCount));
+ };
+}
+
+LegalizeMutation LegalizeMutations::changeElementCountTo(unsigned TypeIdx,
+ LLT NewEltTy) {
+ return [=](const LegalityQuery &Query) {
+ const LLT OldTy = Query.Types[TypeIdx];
+ ElementCount NewEltCount = NewEltTy.isVector() ? NewEltTy.getElementCount()
+ : ElementCount::getFixed(1);
+ return std::make_pair(TypeIdx, OldTy.changeElementCount(NewEltCount));
+ };
+}
+
LegalizeMutation LegalizeMutations::changeElementSizeTo(unsigned TypeIdx,
unsigned FromTypeIdx) {
return [=](const LegalityQuery &Query) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
index 0ab4a7f64840..f09e5b7ce783 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
@@ -14,7 +14,7 @@
#include "llvm/CodeGen/GlobalISel/Legalizer.h"
#include "llvm/ADT/PostOrderIterator.h"
-#include "llvm/ADT/SetVector.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
@@ -24,15 +24,11 @@
#include "llvm/CodeGen/GlobalISel/LostDebugLocObserver.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Error.h"
-#include "llvm/Target/TargetMachine.h"
-
-#include <iterator>
#define DEBUG_TYPE "legalizer"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 37bc8a65dc7c..fb046d519ac8 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -15,10 +15,13 @@
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/CodeGen/GlobalISel/LostDebugLocObserver.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
@@ -1611,40 +1614,6 @@ LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
return Legalized;
}
-Register LegalizerHelper::widenWithUnmerge(LLT WideTy, Register OrigReg) {
- Register WideReg = MRI.createGenericVirtualRegister(WideTy);
- LLT OrigTy = MRI.getType(OrigReg);
- LLT LCMTy = getLCMType(WideTy, OrigTy);
-
- const int NumMergeParts = LCMTy.getSizeInBits() / WideTy.getSizeInBits();
- const int NumUnmergeParts = LCMTy.getSizeInBits() / OrigTy.getSizeInBits();
-
- Register UnmergeSrc = WideReg;
-
- // Create a merge to the LCM type, padding with undef
- // %0:_(<3 x s32>) = G_FOO => <4 x s32>
- // =>
- // %1:_(<4 x s32>) = G_FOO
- // %2:_(<4 x s32>) = G_IMPLICIT_DEF
- // %3:_(<12 x s32>) = G_CONCAT_VECTORS %1, %2, %2
- // %0:_(<3 x s32>), %4:_, %5:_, %6:_ = G_UNMERGE_VALUES %3
- if (NumMergeParts > 1) {
- Register Undef = MIRBuilder.buildUndef(WideTy).getReg(0);
- SmallVector<Register, 8> MergeParts(NumMergeParts, Undef);
- MergeParts[0] = WideReg;
- UnmergeSrc = MIRBuilder.buildMerge(LCMTy, MergeParts).getReg(0);
- }
-
- // Unmerge to the original register and pad with dead defs.
- SmallVector<Register, 8> UnmergeResults(NumUnmergeParts);
- UnmergeResults[0] = OrigReg;
- for (int I = 1; I != NumUnmergeParts; ++I)
- UnmergeResults[I] = MRI.createGenericVirtualRegister(OrigTy);
-
- MIRBuilder.buildUnmerge(UnmergeResults, UnmergeSrc);
- return WideReg;
-}
-
LegalizerHelper::LegalizeResult
LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
LLT WideTy) {
@@ -1867,9 +1836,6 @@ LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
LegalizerHelper::LegalizeResult
LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
LLT WideTy) {
- if (TypeIdx == 1)
- return UnableToLegalize; // TODO
-
unsigned Opcode;
unsigned ExtOpcode;
Optional<Register> CarryIn = None;
@@ -1914,6 +1880,18 @@ LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
break;
}
+ if (TypeIdx == 1) {
+ unsigned BoolExtOp = MIRBuilder.getBoolExtOp(WideTy.isVector(), false);
+
+ Observer.changingInstr(MI);
+ widenScalarDst(MI, WideTy, 1);
+ if (CarryIn)
+ widenScalarSrc(MI, WideTy, 4, BoolExtOp);
+
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+
auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
// Do the arithmetic in the larger type.
@@ -1985,8 +1963,12 @@ LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
LegalizerHelper::LegalizeResult
LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
LLT WideTy) {
- if (TypeIdx == 1)
- return UnableToLegalize;
+ if (TypeIdx == 1) {
+ Observer.changingInstr(MI);
+ widenScalarDst(MI, WideTy, 1);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO;
Register Result = MI.getOperand(0).getReg();
@@ -2992,7 +2974,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) {
if (isa<GSExtLoad>(LoadMI)) {
auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
- } else if (isa<GZExtLoad>(LoadMI) || WideMemTy == DstTy) {
+ } else if (isa<GZExtLoad>(LoadMI) || WideMemTy == LoadTy) {
auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
// The extra bits are guaranteed to be zero, since we stored them that
// way. A zext load from Wide thus automatically gives zext from MemVT.
@@ -3314,7 +3296,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
Observer.changingInstr(MI);
const auto &TII = MIRBuilder.getTII();
MI.setDesc(TII.get(TargetOpcode::G_MUL));
- MI.RemoveOperand(1);
+ MI.removeOperand(1);
Observer.changedInstr(MI);
auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
@@ -4096,13 +4078,14 @@ LegalizerHelper::reduceLoadStoreWidth(GLoadStore &LdStMI, unsigned TypeIdx,
// is a load, return the new registers in ValRegs. For a store, each elements
// of ValRegs should be PartTy. Returns the next offset that needs to be
// handled.
+ bool isBigEndian = MIRBuilder.getDataLayout().isBigEndian();
auto MMO = LdStMI.getMMO();
auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
- unsigned Offset) -> unsigned {
+ unsigned NumParts, unsigned Offset) -> unsigned {
MachineFunction &MF = MIRBuilder.getMF();
unsigned PartSize = PartTy.getSizeInBits();
for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
- Offset += PartSize, ++Idx) {
+ ++Idx) {
unsigned ByteOffset = Offset / 8;
Register NewAddrReg;
@@ -4118,16 +4101,19 @@ LegalizerHelper::reduceLoadStoreWidth(GLoadStore &LdStMI, unsigned TypeIdx,
} else {
MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
}
+ Offset = isBigEndian ? Offset - PartSize : Offset + PartSize;
}
return Offset;
};
- unsigned HandledOffset = splitTypePieces(NarrowTy, NarrowRegs, 0);
+ unsigned Offset = isBigEndian ? TotalSize - NarrowTy.getSizeInBits() : 0;
+ unsigned HandledOffset =
+ splitTypePieces(NarrowTy, NarrowRegs, NumParts, Offset);
// Handle the rest of the register if this isn't an even type breakdown.
if (LeftoverTy.isValid())
- splitTypePieces(LeftoverTy, NarrowLeftoverRegs, HandledOffset);
+ splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
if (IsLoad) {
insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
@@ -4236,6 +4222,14 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_INTTOPTR:
case G_PTRTOINT:
case G_ADDRSPACE_CAST:
+ case G_UADDO:
+ case G_USUBO:
+ case G_UADDE:
+ case G_USUBE:
+ case G_SADDO:
+ case G_SSUBO:
+ case G_SADDE:
+ case G_SSUBE:
return fewerElementsVectorMultiEltType(GMI, NumElts);
case G_ICMP:
case G_FCMP:
@@ -4882,10 +4876,26 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
moreElementsVectorDst(MI, MoreTy, 0);
Observer.changedInstr(MI);
return Legalized;
- case TargetOpcode::G_SELECT:
- if (TypeIdx != 0)
- return UnableToLegalize;
- if (MRI.getType(MI.getOperand(1).getReg()).isVector())
+ case TargetOpcode::G_SELECT: {
+ Register DstReg = MI.getOperand(0).getReg();
+ Register CondReg = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ LLT CondTy = MRI.getType(CondReg);
+ if (TypeIdx == 1) {
+ if (!CondTy.isScalar() ||
+ DstTy.getElementCount() != MoreTy.getElementCount())
+ return UnableToLegalize;
+
+ // This is turning a scalar select of vectors into a vector
+ // select. Broadcast the select condition.
+ auto ShufSplat = MIRBuilder.buildShuffleSplat(MoreTy, CondReg);
+ Observer.changingInstr(MI);
+ MI.getOperand(1).setReg(ShufSplat.getReg(0));
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+
+ if (CondTy.isVector())
return UnableToLegalize;
Observer.changingInstr(MI);
@@ -4894,6 +4904,7 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
moreElementsVectorDst(MI, MoreTy, 0);
Observer.changedInstr(MI);
return Legalized;
+ }
case TargetOpcode::G_UNMERGE_VALUES:
return UnableToLegalize;
case TargetOpcode::G_PHI:
@@ -7229,25 +7240,32 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) {
Register Op2Reg = MI.getOperand(3).getReg();
LLT DstTy = MRI.getType(DstReg);
LLT MaskTy = MRI.getType(MaskReg);
- LLT Op1Ty = MRI.getType(Op1Reg);
if (!DstTy.isVector())
return UnableToLegalize;
- // Vector selects can have a scalar predicate. If so, splat into a vector and
- // finish for later legalization attempts to try again.
if (MaskTy.isScalar()) {
+ // Turn the scalar condition into a vector condition mask.
+
Register MaskElt = MaskReg;
- if (MaskTy.getSizeInBits() < DstTy.getScalarSizeInBits())
- MaskElt = MIRBuilder.buildSExt(DstTy.getElementType(), MaskElt).getReg(0);
- // Generate a vector splat idiom to be pattern matched later.
+
+ // The condition was potentially zero extended before, but we want a sign
+ // extended boolean.
+ if (MaskTy.getSizeInBits() <= DstTy.getScalarSizeInBits() &&
+ MaskTy != LLT::scalar(1)) {
+ MaskElt = MIRBuilder.buildSExtInReg(MaskTy, MaskElt, 1).getReg(0);
+ }
+
+ // Continue the sign extension (or truncate) to match the data type.
+ MaskElt = MIRBuilder.buildSExtOrTrunc(DstTy.getElementType(),
+ MaskElt).getReg(0);
+
+ // Generate a vector splat idiom.
auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt);
- Observer.changingInstr(MI);
- MI.getOperand(1).setReg(ShufSplat.getReg(0));
- Observer.changedInstr(MI);
- return Legalized;
+ MaskReg = ShufSplat.getReg(0);
+ MaskTy = DstTy;
}
- if (MaskTy.getSizeInBits() != Op1Ty.getSizeInBits()) {
+ if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) {
return UnableToLegalize;
}
@@ -7414,7 +7432,7 @@ static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) {
unsigned NumBits = Ty.getScalarSizeInBits();
auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
if (!Ty.isVector() && ValVRegAndVal) {
- APInt Scalar = ValVRegAndVal->Value.truncOrSelf(8);
+ APInt Scalar = ValVRegAndVal->Value.trunc(8);
APInt SplatVal = APInt::getSplat(NumBits, Scalar);
return MIB.buildConstant(Ty, SplatVal).getReg(0);
}
@@ -7569,7 +7587,7 @@ LegalizerHelper::lowerMemcpyInline(MachineInstr &MI) {
// See if this is a constant length copy
auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
// FIXME: support dynamically sized G_MEMCPY_INLINE
- assert(LenVRegAndVal.hasValue() &&
+ assert(LenVRegAndVal &&
"inline memcpy with dynamic size is not yet supported");
uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
if (KnownLen == 0) {
@@ -7609,7 +7627,7 @@ LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src,
bool DstAlignCanChange = false;
MachineFrameInfo &MFI = MF.getFrameInfo();
- Align Alignment = commonAlignment(DstAlign, SrcAlign);
+ Align Alignment = std::min(DstAlign, SrcAlign);
MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
@@ -7644,7 +7662,7 @@ LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src,
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
if (!TRI->hasStackRealignment(MF))
while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
- NewAlign = NewAlign / 2;
+ NewAlign = NewAlign.previous();
if (NewAlign > Alignment) {
Alignment = NewAlign;
@@ -7717,7 +7735,7 @@ LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
bool DstAlignCanChange = false;
MachineFrameInfo &MFI = MF.getFrameInfo();
bool OptSize = shouldLowerMemFuncForSize(MF);
- Align Alignment = commonAlignment(DstAlign, SrcAlign);
+ Align Alignment = std::min(DstAlign, SrcAlign);
MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
@@ -7752,7 +7770,7 @@ LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
if (!TRI->hasStackRealignment(MF))
while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
- NewAlign = NewAlign / 2;
+ NewAlign = NewAlign.previous();
if (NewAlign > Alignment) {
Alignment = NewAlign;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
index 30697913a6a4..6adb7ddb5b66 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
@@ -13,7 +13,6 @@
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/ADT/SmallBitVector.h"
-#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -23,9 +22,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/LowLevelTypeImpl.h"
-#include "llvm/Support/MathExtras.h"
#include <algorithm>
-#include <map>
using namespace llvm;
using namespace LegalizeActions;
@@ -132,15 +129,16 @@ static bool mutationIsSane(const LegalizeRule &Rule,
LLVM_FALLTHROUGH;
case MoreElements: {
// MoreElements can go from scalar to vector.
- const unsigned OldElts = OldTy.isVector() ? OldTy.getNumElements() : 1;
+ const ElementCount OldElts = OldTy.isVector() ?
+ OldTy.getElementCount() : ElementCount::getFixed(1);
if (NewTy.isVector()) {
if (Rule.getAction() == FewerElements) {
// Make sure the element count really decreased.
- if (NewTy.getNumElements() >= OldElts)
+ if (ElementCount::isKnownGE(NewTy.getElementCount(), OldElts))
return false;
} else {
// Make sure the element count really increased.
- if (NewTy.getNumElements() <= OldElts)
+ if (ElementCount::isKnownLE(NewTy.getElementCount(), OldElts))
return false;
}
} else if (Rule.getAction() == MoreElements)
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
index de8dbd456901..d4fbf7d15089 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
@@ -73,6 +73,7 @@ void LoadStoreOpt::init(MachineFunction &MF) {
void LoadStoreOpt::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<AAResultsWrapperPass>();
+ AU.setPreservesAll();
getSelectionDAGFallbackAnalysisUsage(AU);
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -508,6 +509,12 @@ bool LoadStoreOpt::addStoreToCandidate(GStore &StoreMI,
if (StoreMI.getMemSizeInBits() != ValueTy.getSizeInBits())
return false;
+ // Avoid adding volatile or ordered stores to the candidate. We already have a
+ // check for this in instMayAlias() but that only get's called later between
+ // potential aliasing hazards.
+ if (!StoreMI.isSimple())
+ return false;
+
Register StoreAddr = StoreMI.getPointerReg();
auto BIO = getPointerInfo(StoreAddr, *MRI);
Register StoreBase = BIO.BaseReg;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
index 328a278f3d68..c1287693e74d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
@@ -13,6 +13,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/InitializePasses.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index c6720568b362..19ebf46191a9 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -9,8 +9,6 @@
/// This file implements the MachineIRBuidler class.
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
-#include "llvm/Analysis/MemoryLocation.h"
-#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -19,7 +17,7 @@
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
using namespace llvm;
@@ -568,47 +566,6 @@ MachineInstrBuilder MachineIRBuilder::buildExtract(const DstOp &Dst,
return Extract;
}
-void MachineIRBuilder::buildSequence(Register Res, ArrayRef<Register> Ops,
- ArrayRef<uint64_t> Indices) {
-#ifndef NDEBUG
- assert(Ops.size() == Indices.size() && "incompatible args");
- assert(!Ops.empty() && "invalid trivial sequence");
- assert(llvm::is_sorted(Indices) &&
- "sequence offsets must be in ascending order");
-
- assert(getMRI()->getType(Res).isValid() && "invalid operand type");
- for (auto Op : Ops)
- assert(getMRI()->getType(Op).isValid() && "invalid operand type");
-#endif
-
- LLT ResTy = getMRI()->getType(Res);
- LLT OpTy = getMRI()->getType(Ops[0]);
- unsigned OpSize = OpTy.getSizeInBits();
- bool MaybeMerge = true;
- for (unsigned i = 0; i < Ops.size(); ++i) {
- if (getMRI()->getType(Ops[i]) != OpTy || Indices[i] != i * OpSize) {
- MaybeMerge = false;
- break;
- }
- }
-
- if (MaybeMerge && Ops.size() * OpSize == ResTy.getSizeInBits()) {
- buildMerge(Res, Ops);
- return;
- }
-
- Register ResIn = getMRI()->createGenericVirtualRegister(ResTy);
- buildUndef(ResIn);
-
- for (unsigned i = 0; i < Ops.size(); ++i) {
- Register ResOut = i + 1 == Ops.size()
- ? Res
- : getMRI()->createGenericVirtualRegister(ResTy);
- buildInsert(ResOut, ResIn, Ops[i], Indices[i]);
- ResIn = ResOut;
- }
-}
-
MachineInstrBuilder MachineIRBuilder::buildUndef(const DstOp &Res) {
return buildInstr(TargetOpcode::G_IMPLICIT_DEF, {Res}, {});
}
@@ -666,6 +623,17 @@ MachineInstrBuilder MachineIRBuilder::buildBuildVector(const DstOp &Res,
return buildInstr(TargetOpcode::G_BUILD_VECTOR, Res, TmpVec);
}
+MachineInstrBuilder
+MachineIRBuilder::buildBuildVectorConstant(const DstOp &Res,
+ ArrayRef<APInt> Ops) {
+ SmallVector<SrcOp> TmpVec;
+ TmpVec.reserve(Ops.size());
+ LLT EltTy = Res.getLLTTy(*getMRI()).getElementType();
+ for (auto &Op : Ops)
+ TmpVec.push_back(buildConstant(EltTy, Op));
+ return buildInstr(TargetOpcode::G_BUILD_VECTOR, Res, TmpVec);
+}
+
MachineInstrBuilder MachineIRBuilder::buildSplatVector(const DstOp &Res,
const SrcOp &Src) {
SmallVector<SrcOp, 8> TmpVec(Res.getLLTTy(*getMRI()).getNumElements(), Src);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
index 01af6bb51bb7..bce850ee212c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
@@ -14,8 +14,6 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
@@ -25,12 +23,13 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterBank.h"
+#include "llvm/CodeGen/RegisterBankInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Config/llvm-config.h"
-#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
@@ -631,7 +630,8 @@ bool RegBankSelect::assignInstr(MachineInstr &MI) {
"Unexpected hint opcode!");
// The only correct mapping for these is to always use the source register
// bank.
- const RegisterBank *RB = MRI->getRegBankOrNull(MI.getOperand(1).getReg());
+ const RegisterBank *RB =
+ RBI->getRegBank(MI.getOperand(1).getReg(), *MRI, *TRI);
// We can assume every instruction above this one has a selected register
// bank.
assert(RB && "Expected source register to have a register bank?");
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 544af9a2954f..7781761bc131 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -16,14 +16,14 @@
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
+#include "llvm/CodeGen/GlobalISel/LostDebugLocObserver.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
-#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
-#include "llvm/CodeGen/MachineSizeOpts.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineSizeOpts.h"
+#include "llvm/CodeGen/RegisterBankInfo.h"
#include "llvm/CodeGen/StackProtector.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -31,6 +31,7 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/SizeOpts.h"
#define DEBUG_TYPE "globalisel-utils"
@@ -56,6 +57,11 @@ Register llvm::constrainOperandRegClass(
// Assume physical registers are properly constrained.
assert(Register::isVirtualRegister(Reg) && "PhysReg not implemented");
+ // Save the old register class to check whether
+ // the change notifications will be required.
+ // TODO: A better approach would be to pass
+ // the observers to constrainRegToClass().
+ auto *OldRegClass = MRI.getRegClassOrNull(Reg);
Register ConstrainedReg = constrainRegToClass(MRI, TII, RBI, Reg, RegClass);
// If we created a new virtual register because the class is not compatible
// then create a copy between the new and the old register.
@@ -81,7 +87,7 @@ Register llvm::constrainOperandRegClass(
if (GISelChangeObserver *Observer = MF.getObserver()) {
Observer->changedInstr(*RegMO.getParent());
}
- } else {
+ } else if (OldRegClass != MRI.getRegClassOrNull(Reg)) {
if (GISelChangeObserver *Observer = MF.getObserver()) {
if (!RegMO.isDef()) {
MachineInstr *RegDef = MRI.getVRegDef(Reg);
@@ -500,6 +506,7 @@ Optional<APInt> llvm::ConstantFoldBinOp(unsigned Opcode, const Register Op1,
default:
break;
case TargetOpcode::G_ADD:
+ case TargetOpcode::G_PTR_ADD:
return C1 + C2;
case TargetOpcode::G_AND:
return C1 & C2;
@@ -533,6 +540,14 @@ Optional<APInt> llvm::ConstantFoldBinOp(unsigned Opcode, const Register Op1,
if (!C2.getBoolValue())
break;
return C1.srem(C2);
+ case TargetOpcode::G_SMIN:
+ return APIntOps::smin(C1, C2);
+ case TargetOpcode::G_SMAX:
+ return APIntOps::smax(C1, C2);
+ case TargetOpcode::G_UMIN:
+ return APIntOps::umin(C1, C2);
+ case TargetOpcode::G_UMAX:
+ return APIntOps::umax(C1, C2);
}
return None;
@@ -592,33 +607,27 @@ Optional<APFloat> llvm::ConstantFoldFPBinOp(unsigned Opcode, const Register Op1,
return None;
}
-Register llvm::ConstantFoldVectorBinop(unsigned Opcode, const Register Op1,
- const Register Op2,
- const MachineRegisterInfo &MRI,
- MachineIRBuilder &MIB) {
+SmallVector<APInt>
+llvm::ConstantFoldVectorBinop(unsigned Opcode, const Register Op1,
+ const Register Op2,
+ const MachineRegisterInfo &MRI) {
auto *SrcVec2 = getOpcodeDef<GBuildVector>(Op2, MRI);
if (!SrcVec2)
- return Register();
+ return SmallVector<APInt>();
auto *SrcVec1 = getOpcodeDef<GBuildVector>(Op1, MRI);
if (!SrcVec1)
- return Register();
+ return SmallVector<APInt>();
- const LLT EltTy = MRI.getType(SrcVec1->getSourceReg(0));
-
- SmallVector<Register, 16> FoldedElements;
+ SmallVector<APInt> FoldedElements;
for (unsigned Idx = 0, E = SrcVec1->getNumSources(); Idx < E; ++Idx) {
auto MaybeCst = ConstantFoldBinOp(Opcode, SrcVec1->getSourceReg(Idx),
SrcVec2->getSourceReg(Idx), MRI);
if (!MaybeCst)
- return Register();
- auto FoldedCstReg = MIB.buildConstant(EltTy, *MaybeCst).getReg(0);
- FoldedElements.emplace_back(FoldedCstReg);
+ return SmallVector<APInt>();
+ FoldedElements.push_back(*MaybeCst);
}
- // Create the new vector constant.
- auto CstVec =
- MIB.buildBuildVector(MRI.getType(SrcVec1->getReg(0)), FoldedElements);
- return CstVec.getReg(0);
+ return FoldedElements;
}
bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
@@ -1061,15 +1070,38 @@ bool llvm::isBuildVectorConstantSplat(const MachineInstr &MI,
AllowUndef);
}
+Optional<APInt> llvm::getIConstantSplatVal(const Register Reg,
+ const MachineRegisterInfo &MRI) {
+ if (auto SplatValAndReg =
+ getAnyConstantSplat(Reg, MRI, /* AllowUndef */ false)) {
+ Optional<ValueAndVReg> ValAndVReg =
+ getIConstantVRegValWithLookThrough(SplatValAndReg->VReg, MRI);
+ return ValAndVReg->Value;
+ }
+
+ return None;
+}
+
+Optional<APInt> getIConstantSplatVal(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI) {
+ return getIConstantSplatVal(MI.getOperand(0).getReg(), MRI);
+}
+
Optional<int64_t>
-llvm::getBuildVectorConstantSplat(const MachineInstr &MI,
- const MachineRegisterInfo &MRI) {
+llvm::getIConstantSplatSExtVal(const Register Reg,
+ const MachineRegisterInfo &MRI) {
if (auto SplatValAndReg =
- getAnyConstantSplat(MI.getOperand(0).getReg(), MRI, false))
+ getAnyConstantSplat(Reg, MRI, /* AllowUndef */ false))
return getIConstantVRegSExtVal(SplatValAndReg->VReg, MRI);
return None;
}
+Optional<int64_t>
+llvm::getIConstantSplatSExtVal(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI) {
+ return getIConstantSplatSExtVal(MI.getOperand(0).getReg(), MRI);
+}
+
Optional<FPValueAndVReg> llvm::getFConstantSplat(Register VReg,
const MachineRegisterInfo &MRI,
bool AllowUndef) {
@@ -1095,7 +1127,7 @@ Optional<RegOrConstant> llvm::getVectorSplat(const MachineInstr &MI,
unsigned Opc = MI.getOpcode();
if (!isBuildVectorOp(Opc))
return None;
- if (auto Splat = getBuildVectorConstantSplat(MI, MRI))
+ if (auto Splat = getIConstantSplatSExtVal(MI, MRI))
return RegOrConstant(*Splat);
auto Reg = MI.getOperand(1).getReg();
if (any_of(make_range(MI.operands_begin() + 2, MI.operands_end()),
@@ -1104,6 +1136,26 @@ Optional<RegOrConstant> llvm::getVectorSplat(const MachineInstr &MI,
return RegOrConstant(Reg);
}
+static bool isConstantScalar(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI,
+ bool AllowFP = true,
+ bool AllowOpaqueConstants = true) {
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_CONSTANT:
+ case TargetOpcode::G_IMPLICIT_DEF:
+ return true;
+ case TargetOpcode::G_FCONSTANT:
+ return AllowFP;
+ case TargetOpcode::G_GLOBAL_VALUE:
+ case TargetOpcode::G_FRAME_INDEX:
+ case TargetOpcode::G_BLOCK_ADDR:
+ case TargetOpcode::G_JUMP_TABLE:
+ return AllowOpaqueConstants;
+ default:
+ return false;
+ }
+}
+
bool llvm::isConstantOrConstantVector(MachineInstr &MI,
const MachineRegisterInfo &MRI) {
Register Def = MI.getOperand(0).getReg();
@@ -1121,19 +1173,71 @@ bool llvm::isConstantOrConstantVector(MachineInstr &MI,
return true;
}
+bool llvm::isConstantOrConstantVector(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI,
+ bool AllowFP, bool AllowOpaqueConstants) {
+ if (isConstantScalar(MI, MRI, AllowFP, AllowOpaqueConstants))
+ return true;
+
+ if (!isBuildVectorOp(MI.getOpcode()))
+ return false;
+
+ const unsigned NumOps = MI.getNumOperands();
+ for (unsigned I = 1; I != NumOps; ++I) {
+ const MachineInstr *ElementDef = MRI.getVRegDef(MI.getOperand(I).getReg());
+ if (!isConstantScalar(*ElementDef, MRI, AllowFP, AllowOpaqueConstants))
+ return false;
+ }
+
+ return true;
+}
+
Optional<APInt>
llvm::isConstantOrConstantSplatVector(MachineInstr &MI,
const MachineRegisterInfo &MRI) {
Register Def = MI.getOperand(0).getReg();
if (auto C = getIConstantVRegValWithLookThrough(Def, MRI))
return C->Value;
- auto MaybeCst = getBuildVectorConstantSplat(MI, MRI);
+ auto MaybeCst = getIConstantSplatSExtVal(MI, MRI);
if (!MaybeCst)
return None;
const unsigned ScalarSize = MRI.getType(Def).getScalarSizeInBits();
return APInt(ScalarSize, *MaybeCst, true);
}
+bool llvm::isNullOrNullSplat(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI, bool AllowUndefs) {
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_IMPLICIT_DEF:
+ return AllowUndefs;
+ case TargetOpcode::G_CONSTANT:
+ return MI.getOperand(1).getCImm()->isNullValue();
+ case TargetOpcode::G_FCONSTANT: {
+ const ConstantFP *FPImm = MI.getOperand(1).getFPImm();
+ return FPImm->isZero() && !FPImm->isNegative();
+ }
+ default:
+ if (!AllowUndefs) // TODO: isBuildVectorAllZeros assumes undef is OK already
+ return false;
+ return isBuildVectorAllZeros(MI, MRI);
+ }
+}
+
+bool llvm::isAllOnesOrAllOnesSplat(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI,
+ bool AllowUndefs) {
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_IMPLICIT_DEF:
+ return AllowUndefs;
+ case TargetOpcode::G_CONSTANT:
+ return MI.getOperand(1).getCImm()->isAllOnesValue();
+ default:
+ if (!AllowUndefs) // TODO: isBuildVectorAllOnes assumes undef is OK already
+ return false;
+ return isBuildVectorAllOnes(MI, MRI);
+ }
+}
+
bool llvm::matchUnaryPredicate(
const MachineRegisterInfo &MRI, Register Reg,
std::function<bool(const Constant *ConstVal)> Match, bool AllowUndefs) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp
index bbd9006a5d8c..f5833d3b9086 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp
@@ -592,6 +592,13 @@ void GlobalMerge::setMustKeepGlobalVariables(Module &M) {
if (const GlobalVariable *GV =
dyn_cast<GlobalVariable>(U->stripPointerCasts()))
MustKeepGlobalVariables.insert(GV);
+ else if (const ConstantArray *CA = dyn_cast<ConstantArray>(U->stripPointerCasts())) {
+ for (const Use &Elt : CA->operands()) {
+ if (const GlobalVariable *GV =
+ dyn_cast<GlobalVariable>(Elt->stripPointerCasts()))
+ MustKeepGlobalVariables.insert(GV);
+ }
+ }
}
}
}
@@ -609,6 +616,13 @@ bool GlobalMerge::doInitialization(Module &M) {
bool Changed = false;
setMustKeepGlobalVariables(M);
+ LLVM_DEBUG({
+ dbgs() << "Number of GV that must be kept: " <<
+ MustKeepGlobalVariables.size() << "\n";
+ for (auto KeptGV = MustKeepGlobalVariables.begin();
+ KeptGV != MustKeepGlobalVariables.end(); KeptGV++)
+ dbgs() << "Kept: " << **KeptGV << "\n";
+ });
// Grab all non-const globals.
for (auto &GV : M.globals()) {
// Merge is safe for "normal" internal or external globals only
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp b/contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp
index 83b8c2d0eacb..67d6a3df7807 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp
@@ -23,10 +23,8 @@
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
@@ -37,7 +35,6 @@
#include "llvm/PassRegistry.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/IfConversion.cpp b/contrib/llvm-project/llvm/lib/CodeGen/IfConversion.cpp
index 1b20d1da20ad..105ab908d3fa 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/IfConversion.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/IfConversion.cpp
@@ -21,6 +21,7 @@
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MBFIWrapper.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
@@ -28,16 +29,13 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MBFIWrapper.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSchedule.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
-#include "llvm/IR/Attributes.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/InitializePasses.h"
#include "llvm/MC/MCRegisterInfo.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/IndirectBrExpandPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/IndirectBrExpandPass.cpp
index 2d38a44d5a33..5be98e114673 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/IndirectBrExpandPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/IndirectBrExpandPass.cpp
@@ -32,17 +32,13 @@
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/InstIterator.h"
-#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp
index c975013db8c8..06c660807c5c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -23,7 +23,6 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/LiveInterval.h"
-#include "llvm/CodeGen/LiveIntervalCalc.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveRangeEdit.h"
#include "llvm/CodeGen/LiveStacks.h"
@@ -686,7 +685,7 @@ void InlineSpiller::reMaterializeAll() {
// Remove any values that were completely rematted.
for (Register Reg : RegsToSpill) {
LiveInterval &LI = LIS.getInterval(Reg);
- for (VNInfo *VNI : llvm::make_range(LI.vni_begin(), LI.vni_end())) {
+ for (VNInfo *VNI : LI.vnis()) {
if (VNI->isUnused() || VNI->isPHIDef() || UsedValues.count(VNI))
continue;
MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def);
@@ -839,6 +838,13 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops,
unsigned Idx = OpPair.second;
assert(MI == OpPair.first && "Instruction conflict during operand folding");
MachineOperand &MO = MI->getOperand(Idx);
+
+ // No point restoring an undef read, and we'll produce an invalid live
+ // interval.
+ // TODO: Is this really the correct way to handle undef tied uses?
+ if (MO.isUse() && !MO.readsReg() && !MO.isTied())
+ continue;
+
if (MO.isImplicit()) {
ImpReg = MO.getReg();
continue;
@@ -964,7 +970,7 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops,
if (!MO.isReg() || !MO.isImplicit())
break;
if (MO.getReg() == ImpReg)
- FoldMI->RemoveOperand(i - 1);
+ FoldMI->removeOperand(i - 1);
}
LLVM_DEBUG(dumpMachineInstrRangeWithSlotIndex(MIS.begin(), MIS.end(), LIS,
@@ -1608,7 +1614,7 @@ void HoistSpillHelper::hoistAllSpills() {
for (unsigned i = RMEnt->getNumOperands(); i; --i) {
MachineOperand &MO = RMEnt->getOperand(i - 1);
if (MO.isReg() && MO.isImplicit() && MO.isDef() && !MO.isDead())
- RMEnt->RemoveOperand(i - 1);
+ RMEnt->removeOperand(i - 1);
}
}
Edit.eliminateDeadDefs(SpillsToRm, None, AA);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InterferenceCache.h b/contrib/llvm-project/llvm/lib/CodeGen/InterferenceCache.h
index ace1691c1363..97464da9f17b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/InterferenceCache.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/InterferenceCache.h
@@ -37,7 +37,7 @@ class LLVM_LIBRARY_VISIBILITY InterferenceCache {
SlotIndex First;
SlotIndex Last;
- BlockInterference() {}
+ BlockInterference() = default;
};
/// Entry - A cache entry containing interference information for all aliases
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index 5a20580e5479..b3f38a3b53f3 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -46,6 +46,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
@@ -57,7 +58,6 @@
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Type.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
index 230c6846dde2..43858071025a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
@@ -19,7 +19,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
@@ -31,9 +30,8 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/Instructions.h"
#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
@@ -173,10 +171,10 @@ class Polynomial {
};
/// Number of Error Bits e
- unsigned ErrorMSBs;
+ unsigned ErrorMSBs = (unsigned)-1;
/// Value
- Value *V;
+ Value *V = nullptr;
/// Coefficient B
SmallVector<std::pair<BOps, APInt>, 4> B;
@@ -185,7 +183,7 @@ class Polynomial {
APInt A;
public:
- Polynomial(Value *V) : ErrorMSBs((unsigned)-1), V(V) {
+ Polynomial(Value *V) : V(V) {
IntegerType *Ty = dyn_cast<IntegerType>(V->getType());
if (Ty) {
ErrorMSBs = 0;
@@ -195,12 +193,12 @@ public:
}
Polynomial(const APInt &A, unsigned ErrorMSBs = 0)
- : ErrorMSBs(ErrorMSBs), V(nullptr), A(A) {}
+ : ErrorMSBs(ErrorMSBs), A(A) {}
Polynomial(unsigned BitWidth, uint64_t A, unsigned ErrorMSBs = 0)
- : ErrorMSBs(ErrorMSBs), V(nullptr), A(BitWidth, A) {}
+ : ErrorMSBs(ErrorMSBs), A(BitWidth, A) {}
- Polynomial() : ErrorMSBs((unsigned)-1), V(nullptr) {}
+ Polynomial() = default;
/// Increment and clamp the number of undefined bits.
void incErrorMSBs(unsigned amt) {
@@ -1206,9 +1204,7 @@ bool InterleavedLoadCombineImpl::combine(std::list<VectorInfo> &InterleavedLoad,
->getNumElements();
FixedVectorType *ILTy = FixedVectorType::get(ETy, Factor * ElementsPerSVI);
- SmallVector<unsigned, 4> Indices;
- for (unsigned i = 0; i < Factor; i++)
- Indices.push_back(i);
+ auto Indices = llvm::to_vector<4>(llvm::seq<unsigned>(0, Factor));
InterleavedCost = TTI.getInterleavedMemoryOpCost(
Instruction::Load, ILTy, Factor, Indices, InsertionPoint->getAlign(),
InsertionPoint->getPointerAddressSpace(), CostKind);
@@ -1228,7 +1224,7 @@ bool InterleavedLoadCombineImpl::combine(std::list<VectorInfo> &InterleavedLoad,
auto MSSAU = MemorySSAUpdater(&MSSA);
MemoryUse *MSSALoad = cast<MemoryUse>(MSSAU.createMemoryAccessBefore(
LI, nullptr, MSSA.getMemoryAccess(InsertionPoint)));
- MSSAU.insertUse(MSSALoad);
+ MSSAU.insertUse(MSSALoad, /*RenameUses=*/ true);
// Create the final SVIs and replace all uses.
int i = 0;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/JMCInstrumenter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/JMCInstrumenter.cpp
new file mode 100644
index 000000000000..23220872b532
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/JMCInstrumenter.cpp
@@ -0,0 +1,233 @@
+//===- JMCInstrumenter.cpp - JMC Instrumentation --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// JMCInstrumenter pass:
+// - instrument each function with a call to __CheckForDebuggerJustMyCode. The
+// sole argument should be defined in .msvcjmc. Each flag is 1 byte initilized
+// to 1.
+// - create the dummy COMDAT function __JustMyCode_Default to prevent linking
+// error if __CheckForDebuggerJustMyCode is not available.
+// - For MSVC:
+// add "/alternatename:__CheckForDebuggerJustMyCode=__JustMyCode_Default" to
+// "llvm.linker.options"
+// For ELF:
+// Rename __JustMyCode_Default to __CheckForDebuggerJustMyCode and mark it as
+// weak symbol.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/DJB.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "jmc-instrument"
+
+namespace {
+struct JMCInstrumenter : public ModulePass {
+ static char ID;
+ JMCInstrumenter() : ModulePass(ID) {
+ initializeJMCInstrumenterPass(*PassRegistry::getPassRegistry());
+ }
+ bool runOnModule(Module &M) override;
+};
+char JMCInstrumenter::ID = 0;
+} // namespace
+
+INITIALIZE_PASS(
+ JMCInstrumenter, DEBUG_TYPE,
+ "Instrument function entry with call to __CheckForDebuggerJustMyCode",
+ false, false)
+
+ModulePass *llvm::createJMCInstrumenterPass() { return new JMCInstrumenter(); }
+
+namespace {
+const char CheckFunctionName[] = "__CheckForDebuggerJustMyCode";
+
+std::string getFlagName(DISubprogram &SP, bool UseX86FastCall) {
+ // absolute windows path: windows_backslash
+ // relative windows backslash path: windows_backslash
+ // relative windows slash path: posix
+ // absolute posix path: posix
+ // relative posix path: posix
+ sys::path::Style PathStyle =
+ has_root_name(SP.getDirectory(), sys::path::Style::windows_backslash) ||
+ SP.getDirectory().contains("\\") ||
+ SP.getFilename().contains("\\")
+ ? sys::path::Style::windows_backslash
+ : sys::path::Style::posix;
+ // Best effort path normalization. This is to guarantee an unique flag symbol
+ // is produced for the same directory. Some builds may want to use relative
+ // paths, or paths with a specific prefix (see the -fdebug-compilation-dir
+ // flag), so only hash paths in debuginfo. Don't expand them to absolute
+ // paths.
+ SmallString<256> FilePath(SP.getDirectory());
+ sys::path::append(FilePath, PathStyle, SP.getFilename());
+ sys::path::native(FilePath, PathStyle);
+ sys::path::remove_dots(FilePath, /*remove_dot_dot=*/true, PathStyle);
+
+ // The naming convention for the flag name is __<hash>_<file name> with '.' in
+ // <file name> replaced with '@'. For example C:\file.any.c would have a flag
+ // __D032E919_file@any@c. The naming convention match MSVC's format however
+ // the match is not required to make JMC work. The hashing function used here
+ // is different from MSVC's.
+
+ std::string Suffix;
+ for (auto C : sys::path::filename(FilePath, PathStyle))
+ Suffix.push_back(C == '.' ? '@' : C);
+
+ sys::path::remove_filename(FilePath, PathStyle);
+ return (UseX86FastCall ? "_" : "__") +
+ utohexstr(djbHash(FilePath), /*LowerCase=*/false,
+ /*Width=*/8) +
+ "_" + Suffix;
+}
+
+void attachDebugInfo(GlobalVariable &GV, DISubprogram &SP) {
+ Module &M = *GV.getParent();
+ DICompileUnit *CU = SP.getUnit();
+ assert(CU);
+ DIBuilder DB(M, false, CU);
+
+ auto *DType =
+ DB.createBasicType("unsigned char", 8, dwarf::DW_ATE_unsigned_char,
+ llvm::DINode::FlagArtificial);
+
+ auto *DGVE = DB.createGlobalVariableExpression(
+ CU, GV.getName(), /*LinkageName=*/StringRef(), SP.getFile(),
+ /*LineNo=*/0, DType, /*IsLocalToUnit=*/true, /*IsDefined=*/true);
+ GV.addMetadata(LLVMContext::MD_dbg, *DGVE);
+ DB.finalize();
+}
+
+FunctionType *getCheckFunctionType(LLVMContext &Ctx) {
+ Type *VoidTy = Type::getVoidTy(Ctx);
+ PointerType *VoidPtrTy = Type::getInt8PtrTy(Ctx);
+ return FunctionType::get(VoidTy, VoidPtrTy, false);
+}
+
+Function *createDefaultCheckFunction(Module &M, bool UseX86FastCall) {
+ LLVMContext &Ctx = M.getContext();
+ const char *DefaultCheckFunctionName =
+ UseX86FastCall ? "_JustMyCode_Default" : "__JustMyCode_Default";
+ // Create the function.
+ Function *DefaultCheckFunc =
+ Function::Create(getCheckFunctionType(Ctx), GlobalValue::ExternalLinkage,
+ DefaultCheckFunctionName, &M);
+ DefaultCheckFunc->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+ DefaultCheckFunc->addParamAttr(0, Attribute::NoUndef);
+ if (UseX86FastCall)
+ DefaultCheckFunc->addParamAttr(0, Attribute::InReg);
+
+ BasicBlock *EntryBB = BasicBlock::Create(Ctx, "", DefaultCheckFunc);
+ ReturnInst::Create(Ctx, EntryBB);
+ return DefaultCheckFunc;
+}
+} // namespace
+
+bool JMCInstrumenter::runOnModule(Module &M) {
+ bool Changed = false;
+ LLVMContext &Ctx = M.getContext();
+ Triple ModuleTriple(M.getTargetTriple());
+ bool IsMSVC = ModuleTriple.isKnownWindowsMSVCEnvironment();
+ bool IsELF = ModuleTriple.isOSBinFormatELF();
+ assert((IsELF || IsMSVC) && "Unsupported triple for JMC");
+ bool UseX86FastCall = IsMSVC && ModuleTriple.getArch() == Triple::x86;
+ const char *const FlagSymbolSection = IsELF ? ".just.my.code" : ".msvcjmc";
+
+ GlobalValue *CheckFunction = nullptr;
+ DenseMap<DISubprogram *, Constant *> SavedFlags(8);
+ for (auto &F : M) {
+ if (F.isDeclaration())
+ continue;
+ auto *SP = F.getSubprogram();
+ if (!SP)
+ continue;
+
+ Constant *&Flag = SavedFlags[SP];
+ if (!Flag) {
+ std::string FlagName = getFlagName(*SP, UseX86FastCall);
+ IntegerType *FlagTy = Type::getInt8Ty(Ctx);
+ Flag = M.getOrInsertGlobal(FlagName, FlagTy, [&] {
+ // FIXME: Put the GV in comdat and have linkonce_odr linkage to save
+ // .msvcjmc section space? maybe not worth it.
+ GlobalVariable *GV = new GlobalVariable(
+ M, FlagTy, /*isConstant=*/false, GlobalValue::InternalLinkage,
+ ConstantInt::get(FlagTy, 1), FlagName);
+ GV->setSection(FlagSymbolSection);
+ GV->setAlignment(Align(1));
+ GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+ attachDebugInfo(*GV, *SP);
+ return GV;
+ });
+ }
+
+ if (!CheckFunction) {
+ Function *DefaultCheckFunc =
+ createDefaultCheckFunction(M, UseX86FastCall);
+ if (IsELF) {
+ DefaultCheckFunc->setName(CheckFunctionName);
+ DefaultCheckFunc->setLinkage(GlobalValue::WeakAnyLinkage);
+ CheckFunction = DefaultCheckFunc;
+ } else {
+ assert(!M.getFunction(CheckFunctionName) &&
+ "JMC instrument more than once?");
+ auto *CheckFunc = cast<Function>(
+ M.getOrInsertFunction(CheckFunctionName, getCheckFunctionType(Ctx))
+ .getCallee());
+ CheckFunc->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+ CheckFunc->addParamAttr(0, Attribute::NoUndef);
+ if (UseX86FastCall) {
+ CheckFunc->setCallingConv(CallingConv::X86_FastCall);
+ CheckFunc->addParamAttr(0, Attribute::InReg);
+ }
+ CheckFunction = CheckFunc;
+
+ StringRef DefaultCheckFunctionName = DefaultCheckFunc->getName();
+ appendToUsed(M, {DefaultCheckFunc});
+ Comdat *C = M.getOrInsertComdat(DefaultCheckFunctionName);
+ C->setSelectionKind(Comdat::Any);
+ DefaultCheckFunc->setComdat(C);
+ // Add a linker option /alternatename to set the default implementation
+ // for the check function.
+ // https://devblogs.microsoft.com/oldnewthing/20200731-00/?p=104024
+ std::string AltOption = std::string("/alternatename:") +
+ CheckFunctionName + "=" +
+ DefaultCheckFunctionName.str();
+ llvm::Metadata *Ops[] = {llvm::MDString::get(Ctx, AltOption)};
+ MDTuple *N = MDNode::get(Ctx, Ops);
+ M.getOrInsertNamedMetadata("llvm.linker.options")->addOperand(N);
+ }
+ }
+ // FIXME: it would be nice to make CI scheduling boundary, although in
+ // practice it does not matter much.
+ auto *CI = CallInst::Create(getCheckFunctionType(Ctx), CheckFunction,
+ {Flag}, "", &*F.begin()->getFirstInsertionPt());
+ CI->addParamAttr(0, Attribute::NoUndef);
+ if (UseX86FastCall) {
+ CI->setCallingConv(CallingConv::X86_FastCall);
+ CI->addParamAttr(0, Attribute::InReg);
+ }
+
+ Changed = true;
+ }
+ return Changed;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LLVMTargetMachine.cpp
index 0d3685d4141c..3192dcadb5f5 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LLVMTargetMachine.cpp
@@ -23,20 +23,19 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
-static cl::opt<bool> EnableTrapUnreachable("trap-unreachable",
- cl::Hidden, cl::ZeroOrMore, cl::init(false),
- cl::desc("Enable generating trap for unreachable"));
+static cl::opt<bool>
+ EnableTrapUnreachable("trap-unreachable", cl::Hidden,
+ cl::desc("Enable generating trap for unreachable"));
void LLVMTargetMachine::initAsmInfo() {
MRI.reset(TheTarget.createMCRegInfo(getTargetTriple().str()));
@@ -99,7 +98,7 @@ LLVMTargetMachine::LLVMTargetMachine(const Target &T,
}
TargetTransformInfo
-LLVMTargetMachine::getTargetTransformInfo(const Function &F) {
+LLVMTargetMachine::getTargetTransformInfo(const Function &F) const {
return TargetTransformInfo(BasicTTIImpl(this, F));
}
@@ -164,22 +163,35 @@ Expected<std::unique_ptr<MCStreamer>> LLVMTargetMachine::createMCStreamer(
// Create a code emitter if asked to show the encoding.
std::unique_ptr<MCCodeEmitter> MCE;
if (Options.MCOptions.ShowMCEncoding)
- MCE.reset(getTarget().createMCCodeEmitter(MII, MRI, Context));
+ MCE.reset(getTarget().createMCCodeEmitter(MII, Context));
+
+ bool UseDwarfDirectory = false;
+ switch (Options.MCOptions.MCUseDwarfDirectory) {
+ case MCTargetOptions::DisableDwarfDirectory:
+ UseDwarfDirectory = false;
+ break;
+ case MCTargetOptions::EnableDwarfDirectory:
+ UseDwarfDirectory = true;
+ break;
+ case MCTargetOptions::DefaultDwarfDirectory:
+ UseDwarfDirectory = MAI.enableDwarfFileDirectoryDefault();
+ break;
+ }
std::unique_ptr<MCAsmBackend> MAB(
getTarget().createMCAsmBackend(STI, MRI, Options.MCOptions));
auto FOut = std::make_unique<formatted_raw_ostream>(Out);
MCStreamer *S = getTarget().createAsmStreamer(
Context, std::move(FOut), Options.MCOptions.AsmVerbose,
- Options.MCOptions.MCUseDwarfDirectory, InstPrinter, std::move(MCE),
- std::move(MAB), Options.MCOptions.ShowMCInst);
+ UseDwarfDirectory, InstPrinter, std::move(MCE), std::move(MAB),
+ Options.MCOptions.ShowMCInst);
AsmStreamer.reset(S);
break;
}
case CGFT_ObjectFile: {
// Create the code emitter for the target if it exists. If not, .o file
// emission fails.
- MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(MII, MRI, Context);
+ MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(MII, Context);
if (!MCE)
return make_error<StringError>("createMCCodeEmitter failed",
inconvertibleErrorCode());
@@ -252,6 +264,9 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx,
"Cannot emit MC with limited codegen pipeline");
Ctx = &MMIWP->getMMI().getContext();
+ // libunwind is unable to load compact unwind dynamically, so we must generate
+ // DWARF unwind info for the JIT.
+ Options.MCOptions.EmitDwarfUnwind = EmitDwarfUnwindType::Always;
if (Options.MCOptions.MCSaveTempLabels)
Ctx->setAllowTemporaryLabels(false);
@@ -259,8 +274,7 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx,
// emission fails.
const MCSubtargetInfo &STI = *getMCSubtargetInfo();
const MCRegisterInfo &MRI = *getMCRegisterInfo();
- MCCodeEmitter *MCE =
- getTarget().createMCCodeEmitter(*getMCInstrInfo(), MRI, *Ctx);
+ MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getMCInstrInfo(), *Ctx);
MCAsmBackend *MAB =
getTarget().createMCAsmBackend(STI, MRI, Options.MCOptions);
if (!MCE || !MAB)
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp
index 63a0d0c1c43e..39b44b917d9e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp
@@ -14,6 +14,7 @@
///===---------------------------------------------------------------------===//
#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/InitializePasses.h"
using namespace llvm;
@@ -87,7 +88,7 @@ LazyMachineBlockFrequencyInfoPass::calculateIfNotAvailable() const {
OwnedMBFI = std::make_unique<MachineBlockFrequencyInfo>();
OwnedMBFI->calculate(*MF, MBPI, *MLI);
- return *OwnedMBFI.get();
+ return *OwnedMBFI;
}
bool LazyMachineBlockFrequencyInfoPass::runOnMachineFunction(
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
index 6af5f07d801a..30ca8bd871e8 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
@@ -84,21 +84,18 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/IteratedDominanceFrontier.h"
+#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/LexicalScopes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineInstrBundle.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -106,27 +103,23 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Config/llvm-config.h"
-#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/Module.h"
-#include "llvm/InitializePasses.h"
#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/GenericIteratedDominanceFrontier.h"
#include "llvm/Support/TypeSize.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Utils/SSAUpdaterImpl.h"
#include <algorithm>
#include <cassert>
+#include <climits>
#include <cstdint>
#include <functional>
-#include <limits.h>
-#include <limits>
#include <queue>
#include <tuple>
#include <utility>
@@ -266,7 +259,7 @@ public:
/// object fields to track variable locations as we step through the block.
/// FIXME: could just examine mloctracker instead of passing in \p mlocs?
void
- loadInlocs(MachineBasicBlock &MBB, ValueIDNum *MLocs,
+ loadInlocs(MachineBasicBlock &MBB, ValueTable &MLocs,
const SmallVectorImpl<std::pair<DebugVariable, DbgValue>> &VLocs,
unsigned NumLocs) {
ActiveMLocs.clear();
@@ -729,6 +722,20 @@ MLocTracker::MLocTracker(MachineFunction &MF, const TargetInstrInfo &TII,
StackSlotIdxes.insert({{Size, Offs}, Idx});
}
+ // There may also be strange register class sizes (think x86 fp80s).
+ for (const TargetRegisterClass *RC : TRI.regclasses()) {
+ unsigned Size = TRI.getRegSizeInBits(*RC);
+
+ // We might see special reserved values as sizes, and classes for other
+ // stuff the machine tries to model. If it's more than 512 bits, then it
+ // is very unlikely to be a register than can be spilt.
+ if (Size > 512)
+ continue;
+
+ unsigned Idx = StackSlotIdxes.size();
+ StackSlotIdxes.insert({{Size, 0}, Idx});
+ }
+
for (auto &Idx : StackSlotIdxes)
StackIdxesToPos[Idx.second] = Idx.first;
@@ -863,19 +870,72 @@ MachineInstrBuilder MLocTracker::emitLoc(Optional<LocIdx> MLoc,
// the variable is.
if (Offset == 0) {
const SpillLoc &Spill = SpillLocs[SpillID.id()];
- Expr = TRI.prependOffsetExpression(Expr, DIExpression::ApplyOffset,
- Spill.SpillOffset);
unsigned Base = Spill.SpillBase;
MIB.addReg(Base);
- MIB.addImm(0);
- // Being on the stack makes this location indirect; if it was _already_
- // indirect though, we need to add extra indirection. See this test for
- // a scenario where this happens:
- // llvm/test/DebugInfo/X86/spill-nontrivial-param.ll
+ // There are several ways we can dereference things, and several inputs
+ // to consider:
+ // * NRVO variables will appear with IsIndirect set, but should have
+ // nothing else in their DIExpressions,
+ // * Variables with DW_OP_stack_value in their expr already need an
+ // explicit dereference of the stack location,
+ // * Values that don't match the variable size need DW_OP_deref_size,
+ // * Everything else can just become a simple location expression.
+
+ // We need to use deref_size whenever there's a mismatch between the
+ // size of value and the size of variable portion being read.
+ // Additionally, we should use it whenever dealing with stack_value
+ // fragments, to avoid the consumer having to determine the deref size
+ // from DW_OP_piece.
+ bool UseDerefSize = false;
+ unsigned ValueSizeInBits = getLocSizeInBits(*MLoc);
+ unsigned DerefSizeInBytes = ValueSizeInBits / 8;
+ if (auto Fragment = Var.getFragment()) {
+ unsigned VariableSizeInBits = Fragment->SizeInBits;
+ if (VariableSizeInBits != ValueSizeInBits || Expr->isComplex())
+ UseDerefSize = true;
+ } else if (auto Size = Var.getVariable()->getSizeInBits()) {
+ if (*Size != ValueSizeInBits) {
+ UseDerefSize = true;
+ }
+ }
+
if (Properties.Indirect) {
- std::vector<uint64_t> Elts = {dwarf::DW_OP_deref};
- Expr = DIExpression::append(Expr, Elts);
+ // This is something like an NRVO variable, where the pointer has been
+ // spilt to the stack, or a dbg.addr pointing at a coroutine frame
+ // field. It should end up being a memory location, with the pointer
+ // to the variable loaded off the stack with a deref. It can't be a
+ // DW_OP_stack_value expression.
+ assert(!Expr->isImplicit());
+ Expr = TRI.prependOffsetExpression(
+ Expr, DIExpression::ApplyOffset | DIExpression::DerefAfter,
+ Spill.SpillOffset);
+ MIB.addImm(0);
+ } else if (UseDerefSize) {
+ // We're loading a value off the stack that's not the same size as the
+ // variable. Add / subtract stack offset, explicitly deref with a size,
+ // and add DW_OP_stack_value if not already present.
+ SmallVector<uint64_t, 2> Ops = {dwarf::DW_OP_deref_size,
+ DerefSizeInBytes};
+ Expr = DIExpression::prependOpcodes(Expr, Ops, true);
+ unsigned Flags = DIExpression::StackValue | DIExpression::ApplyOffset;
+ Expr = TRI.prependOffsetExpression(Expr, Flags, Spill.SpillOffset);
+ MIB.addReg(0);
+ } else if (Expr->isComplex()) {
+ // A variable with no size ambiguity, but with extra elements in it's
+ // expression. Manually dereference the stack location.
+ assert(Expr->isComplex());
+ Expr = TRI.prependOffsetExpression(
+ Expr, DIExpression::ApplyOffset | DIExpression::DerefAfter,
+ Spill.SpillOffset);
+ MIB.addReg(0);
+ } else {
+ // A plain value that has been spilt to the stack, with no further
+ // context. Request a location expression, marking the DBG_VALUE as
+ // IsIndirect.
+ Expr = TRI.prependOffsetExpression(Expr, DIExpression::ApplyOffset,
+ Spill.SpillOffset);
+ MIB.addImm(0);
}
} else {
// This is a stack location with a weird subregister offset: emit an undef
@@ -899,7 +959,7 @@ MachineInstrBuilder MLocTracker::emitLoc(Optional<LocIdx> MLoc,
}
/// Default construct and initialize the pass.
-InstrRefBasedLDV::InstrRefBasedLDV() {}
+InstrRefBasedLDV::InstrRefBasedLDV() = default;
bool InstrRefBasedLDV::isCalleeSaved(LocIdx L) const {
unsigned Reg = MTracker->LocIdxToLocID[L];
@@ -1022,8 +1082,8 @@ bool InstrRefBasedLDV::transferDebugValue(const MachineInstr &MI) {
}
bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI,
- ValueIDNum **MLiveOuts,
- ValueIDNum **MLiveIns) {
+ const ValueTable *MLiveOuts,
+ const ValueTable *MLiveIns) {
if (!MI.isDebugRef())
return false;
@@ -1091,15 +1151,25 @@ bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI,
if (L)
NewID = ValueIDNum(BlockNo, InstrIt->second.second, *L);
} else if (OpNo != MachineFunction::DebugOperandMemNumber) {
- assert(OpNo < TargetInstr.getNumOperands());
- const MachineOperand &MO = TargetInstr.getOperand(OpNo);
-
- // Today, this can only be a register.
- assert(MO.isReg() && MO.isDef());
+ // Permit the debug-info to be completely wrong: identifying a nonexistant
+ // operand, or one that is not a register definition, means something
+ // unexpected happened during optimisation. Broken debug-info, however,
+ // shouldn't crash the compiler -- instead leave the variable value as
+ // None, which will make it appear "optimised out".
+ if (OpNo < TargetInstr.getNumOperands()) {
+ const MachineOperand &MO = TargetInstr.getOperand(OpNo);
+
+ if (MO.isReg() && MO.isDef() && MO.getReg()) {
+ unsigned LocID = MTracker->getLocID(MO.getReg());
+ LocIdx L = MTracker->LocIDToLocIdx[LocID];
+ NewID = ValueIDNum(BlockNo, InstrIt->second.second, L);
+ }
+ }
- unsigned LocID = MTracker->getLocID(MO.getReg());
- LocIdx L = MTracker->LocIDToLocIdx[LocID];
- NewID = ValueIDNum(BlockNo, InstrIt->second.second, L);
+ if (!NewID) {
+ LLVM_DEBUG(
+ { dbgs() << "Seen instruction reference to illegal operand\n"; });
+ }
}
// else: NewID is left as None.
} else if (PHIIt != DebugPHINumToValue.end() && PHIIt->InstrNum == InstNo) {
@@ -1249,7 +1319,16 @@ bool InstrRefBasedLDV::transferDebugPHI(MachineInstr &MI) {
const MachineOperand &MO = MI.getOperand(0);
unsigned InstrNum = MI.getOperand(1).getImm();
- if (MO.isReg()) {
+ auto EmitBadPHI = [this, &MI, InstrNum](void) -> bool {
+ // Helper lambda to do any accounting when we fail to find a location for
+ // a DBG_PHI. This can happen if DBG_PHIs are malformed, or refer to a
+ // dead stack slot, for example.
+ // Record a DebugPHIRecord with an empty value + location.
+ DebugPHINumToValue.push_back({InstrNum, MI.getParent(), None, None});
+ return true;
+ };
+
+ if (MO.isReg() && MO.getReg()) {
// The value is whatever's currently in the register. Read and record it,
// to be analysed later.
Register Reg = MO.getReg();
@@ -1261,15 +1340,14 @@ bool InstrRefBasedLDV::transferDebugPHI(MachineInstr &MI) {
// Ensure this register is tracked.
for (MCRegAliasIterator RAI(MO.getReg(), TRI, true); RAI.isValid(); ++RAI)
MTracker->lookupOrTrackRegister(*RAI);
- } else {
+ } else if (MO.isFI()) {
// The value is whatever's in this stack slot.
- assert(MO.isFI());
unsigned FI = MO.getIndex();
// If the stack slot is dead, then this was optimized away.
// FIXME: stack slot colouring should account for slots that get merged.
if (MFI->isDeadObjectIndex(FI))
- return true;
+ return EmitBadPHI();
// Identify this spill slot, ensure it's tracked.
Register Base;
@@ -1280,43 +1358,27 @@ bool InstrRefBasedLDV::transferDebugPHI(MachineInstr &MI) {
// We might be able to find a value, but have chosen not to, to avoid
// tracking too much stack information.
if (!SpillNo)
- return true;
+ return EmitBadPHI();
- // Problem: what value should we extract from the stack? LLVM does not
- // record what size the last store to the slot was, and it would become
- // sketchy after stack slot colouring anyway. Take a look at what values
- // are stored on the stack, and pick the largest one that wasn't def'd
- // by a spill (i.e., the value most likely to have been def'd in a register
- // and then spilt.
- std::array<unsigned, 4> CandidateSizes = {64, 32, 16, 8};
- Optional<ValueIDNum> Result = None;
- Optional<LocIdx> SpillLoc = None;
- for (unsigned CS : CandidateSizes) {
- unsigned SpillID = MTracker->getLocID(*SpillNo, {CS, 0});
- SpillLoc = MTracker->getSpillMLoc(SpillID);
- ValueIDNum Val = MTracker->readMLoc(*SpillLoc);
- // If this value was defined in it's own position, then it was probably
- // an aliasing index of a small value that was spilt.
- if (Val.getLoc() != SpillLoc->asU64()) {
- Result = Val;
- break;
- }
- }
+ // Any stack location DBG_PHI should have an associate bit-size.
+ assert(MI.getNumOperands() == 3 && "Stack DBG_PHI with no size?");
+ unsigned slotBitSize = MI.getOperand(2).getImm();
- // If we didn't find anything, we're probably looking at a PHI, or a memory
- // store folded into an instruction. FIXME: Take a guess that's it's 64
- // bits. This isn't ideal, but tracking the size that the spill is
- // "supposed" to be is more complex, and benefits a small number of
- // locations.
- if (!Result) {
- unsigned SpillID = MTracker->getLocID(*SpillNo, {64, 0});
- SpillLoc = MTracker->getSpillMLoc(SpillID);
- Result = MTracker->readMLoc(*SpillLoc);
- }
+ unsigned SpillID = MTracker->getLocID(*SpillNo, {slotBitSize, 0});
+ LocIdx SpillLoc = MTracker->getSpillMLoc(SpillID);
+ ValueIDNum Result = MTracker->readMLoc(SpillLoc);
// Record this DBG_PHI for later analysis.
- auto DbgPHI = DebugPHIRecord({InstrNum, MI.getParent(), *Result, *SpillLoc});
+ auto DbgPHI = DebugPHIRecord({InstrNum, MI.getParent(), Result, SpillLoc});
DebugPHINumToValue.push_back(DbgPHI);
+ } else {
+ // Else: if the operand is neither a legal register or a stack slot, then
+ // we're being fed illegal debug-info. Record an empty PHI, so that any
+ // debug users trying to read this number will be put off trying to
+ // interpret the value.
+ LLVM_DEBUG(
+ { dbgs() << "Seen DBG_PHI with unrecognised operand format\n"; });
+ return EmitBadPHI();
}
return true;
@@ -1614,11 +1676,6 @@ bool InstrRefBasedLDV::transferSpillOrRestoreInst(MachineInstr &MI) {
LocIdx SrcIdx = MTracker->getSpillMLoc(SpillID);
auto ReadValue = MTracker->readMLoc(SrcIdx);
MTracker->setReg(DestReg, ReadValue);
-
- if (TTracker) {
- LocIdx DstLoc = MTracker->getRegMLoc(DestReg);
- TTracker->transferMlocs(SrcIdx, DstLoc, MI.getIterator());
- }
};
for (MCSubRegIterator SRI(Reg, TRI, false); SRI.isValid(); ++SRI) {
@@ -1755,8 +1812,8 @@ void InstrRefBasedLDV::accumulateFragmentMap(MachineInstr &MI) {
AllSeenFragments.insert(ThisFragment);
}
-void InstrRefBasedLDV::process(MachineInstr &MI, ValueIDNum **MLiveOuts,
- ValueIDNum **MLiveIns) {
+void InstrRefBasedLDV::process(MachineInstr &MI, const ValueTable *MLiveOuts,
+ const ValueTable *MLiveIns) {
// Try to interpret an MI as a debug or transfer instruction. Only if it's
// none of these should we interpret it's register defs as new value
// definitions.
@@ -1806,7 +1863,10 @@ void InstrRefBasedLDV::produceMLocTransferFunction(
// Step through each instruction in this block.
for (auto &MI : MBB) {
- process(MI);
+ // Pass in an empty unique_ptr for the value tables when accumulating the
+ // machine transfer function.
+ process(MI, nullptr, nullptr);
+
// Also accumulate fragment map.
if (MI.isDebugValue() || MI.isDebugRef())
accumulateFragmentMap(MI);
@@ -1895,7 +1955,7 @@ void InstrRefBasedLDV::produceMLocTransferFunction(
bool InstrRefBasedLDV::mlocJoin(
MachineBasicBlock &MBB, SmallPtrSet<const MachineBasicBlock *, 16> &Visited,
- ValueIDNum **OutLocs, ValueIDNum *InLocs) {
+ FuncValueTable &OutLocs, ValueTable &InLocs) {
LLVM_DEBUG(dbgs() << "join MBB: " << MBB.getNumber() << "\n");
bool Changed = false;
@@ -1996,7 +2056,7 @@ void InstrRefBasedLDV::findStackIndexInterference(
void InstrRefBasedLDV::placeMLocPHIs(
MachineFunction &MF, SmallPtrSetImpl<MachineBasicBlock *> &AllBlocks,
- ValueIDNum **MInLocs, SmallVectorImpl<MLocTransferMap> &MLocTransfer) {
+ FuncValueTable &MInLocs, SmallVectorImpl<MLocTransferMap> &MLocTransfer) {
SmallVector<unsigned, 4> StackUnits;
findStackIndexInterference(StackUnits);
@@ -2125,7 +2185,7 @@ void InstrRefBasedLDV::placeMLocPHIs(
}
void InstrRefBasedLDV::buildMLocValueMap(
- MachineFunction &MF, ValueIDNum **MInLocs, ValueIDNum **MOutLocs,
+ MachineFunction &MF, FuncValueTable &MInLocs, FuncValueTable &MOutLocs,
SmallVectorImpl<MLocTransferMap> &MLocTransfer) {
std::priority_queue<unsigned int, std::vector<unsigned int>,
std::greater<unsigned int>>
@@ -2267,7 +2327,7 @@ void InstrRefBasedLDV::BlockPHIPlacement(
Optional<ValueIDNum> InstrRefBasedLDV::pickVPHILoc(
const MachineBasicBlock &MBB, const DebugVariable &Var,
- const LiveIdxT &LiveOuts, ValueIDNum **MOutLocs,
+ const LiveIdxT &LiveOuts, FuncValueTable &MOutLocs,
const SmallVectorImpl<const MachineBasicBlock *> &BlockOrders) {
// Collect a set of locations from predecessor where its live-out value can
// be found.
@@ -2535,7 +2595,7 @@ void InstrRefBasedLDV::getBlocksForScope(
void InstrRefBasedLDV::buildVLocValueMap(
const DILocation *DILoc, const SmallSet<DebugVariable, 4> &VarsWeCareAbout,
SmallPtrSetImpl<MachineBasicBlock *> &AssignBlocks, LiveInsT &Output,
- ValueIDNum **MOutLocs, ValueIDNum **MInLocs,
+ FuncValueTable &MOutLocs, FuncValueTable &MInLocs,
SmallVectorImpl<VLocTracker> &AllTheVLocs) {
// This method is much like buildMLocValueMap: but focuses on a single
// LexicalScope at a time. Pick out a set of blocks and variables that are
@@ -2920,7 +2980,7 @@ void InstrRefBasedLDV::makeDepthFirstEjectionMap(
bool InstrRefBasedLDV::depthFirstVLocAndEmit(
unsigned MaxNumBlocks, const ScopeToDILocT &ScopeToDILocation,
const ScopeToVarsT &ScopeToVars, ScopeToAssignBlocksT &ScopeToAssignBlocks,
- LiveInsT &Output, ValueIDNum **MOutLocs, ValueIDNum **MInLocs,
+ LiveInsT &Output, FuncValueTable &MOutLocs, FuncValueTable &MInLocs,
SmallVectorImpl<VLocTracker> &AllTheVLocs, MachineFunction &MF,
DenseMap<DebugVariable, unsigned> &AllVarsNumbering,
const TargetPassConfig &TPC) {
@@ -2929,15 +2989,8 @@ bool InstrRefBasedLDV::depthFirstVLocAndEmit(
VTracker = nullptr;
// No scopes? No variable locations.
- if (!LS.getCurrentFunctionScope()) {
- // FIXME: this is a sticking plaster to prevent a memory leak, these
- // pointers will be automagically freed by being unique pointers, shortly.
- for (unsigned int I = 0; I < MaxNumBlocks; ++I) {
- delete[] MInLocs[I];
- delete[] MOutLocs[I];
- }
+ if (!LS.getCurrentFunctionScope())
return false;
- }
// Build map from block number to the last scope that uses the block.
SmallVector<unsigned, 16> EjectionMap;
@@ -2961,17 +3014,14 @@ bool InstrRefBasedLDV::depthFirstVLocAndEmit(
CurBB = BBNum;
CurInst = 1;
for (auto &MI : MBB) {
- process(MI, MOutLocs, MInLocs);
+ process(MI, MOutLocs.get(), MInLocs.get());
TTracker->checkInstForNewValues(CurInst, MI.getIterator());
++CurInst;
}
// Free machine-location tables for this block.
- delete[] MInLocs[BBNum];
- delete[] MOutLocs[BBNum];
- // Make ourselves brittle to use-after-free errors.
- MInLocs[BBNum] = nullptr;
- MOutLocs[BBNum] = nullptr;
+ MInLocs[BBNum].reset();
+ MOutLocs[BBNum].reset();
// We don't need live-in variable values for this block either.
Output[BBNum].clear();
AllTheVLocs[BBNum].clear();
@@ -3039,16 +3089,6 @@ bool InstrRefBasedLDV::depthFirstVLocAndEmit(
if (MOutLocs[MBB->getNumber()])
EjectBlock(*MBB);
- // Finally, there might have been gaps in the block numbering, from dead
- // blocks being deleted or folded. In those scenarios, we might allocate a
- // block-table that's never ejected, meaning we have to free it at the end.
- for (unsigned int I = 0; I < MaxNumBlocks; ++I) {
- if (MInLocs[I]) {
- delete[] MInLocs[I];
- delete[] MOutLocs[I];
- }
- }
-
return emitTransfers(AllVarsNumbering);
}
@@ -3135,24 +3175,24 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
assert(MaxNumBlocks >= 0);
++MaxNumBlocks;
+ initialSetup(MF);
+
MLocTransfer.resize(MaxNumBlocks);
vlocs.resize(MaxNumBlocks, VLocTracker(OverlapFragments, EmptyExpr));
SavedLiveIns.resize(MaxNumBlocks);
- initialSetup(MF);
-
produceMLocTransferFunction(MF, MLocTransfer, MaxNumBlocks);
// Allocate and initialize two array-of-arrays for the live-in and live-out
// machine values. The outer dimension is the block number; while the inner
// dimension is a LocIdx from MLocTracker.
- ValueIDNum **MOutLocs = new ValueIDNum *[MaxNumBlocks];
- ValueIDNum **MInLocs = new ValueIDNum *[MaxNumBlocks];
+ FuncValueTable MOutLocs = std::make_unique<ValueTable[]>(MaxNumBlocks);
+ FuncValueTable MInLocs = std::make_unique<ValueTable[]>(MaxNumBlocks);
unsigned NumLocs = MTracker->getNumLocs();
for (int i = 0; i < MaxNumBlocks; ++i) {
// These all auto-initialize to ValueIDNum::EmptyValue
- MOutLocs[i] = new ValueIDNum[NumLocs];
- MInLocs[i] = new ValueIDNum[NumLocs];
+ MOutLocs[i] = std::make_unique<ValueIDNum[]>(NumLocs);
+ MInLocs[i] = std::make_unique<ValueIDNum[]>(NumLocs);
}
// Solve the machine value dataflow problem using the MLocTransfer function,
@@ -3165,7 +3205,10 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
// either live-through machine values, or PHIs.
for (auto &DBG_PHI : DebugPHINumToValue) {
// Identify unresolved block-live-ins.
- ValueIDNum &Num = DBG_PHI.ValueRead;
+ if (!DBG_PHI.ValueRead)
+ continue;
+
+ ValueIDNum &Num = *DBG_PHI.ValueRead;
if (!Num.isPHI())
continue;
@@ -3186,7 +3229,7 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
MTracker->loadFromArray(MInLocs[CurBB], CurBB);
CurInst = 1;
for (auto &MI : MBB) {
- process(MI, MOutLocs, MInLocs);
+ process(MI, MOutLocs.get(), MInLocs.get());
++CurInst;
}
MTracker->reset();
@@ -3241,12 +3284,6 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
<< " has " << MaxNumBlocks << " basic blocks and "
<< VarAssignCount
<< " variable assignments, exceeding limits.\n");
-
- // Perform memory cleanup that emitLocations would do otherwise.
- for (int Idx = 0; Idx < MaxNumBlocks; ++Idx) {
- delete[] MOutLocs[Idx];
- delete[] MInLocs[Idx];
- }
} else {
// Optionally, solve the variable value problem and emit to blocks by using
// a lexical-scope-depth search. It should be functionally identical to
@@ -3256,10 +3293,6 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
SavedLiveIns, MOutLocs, MInLocs, vlocs, MF, AllVarsNumbering, *TPC);
}
- // Elements of these arrays will be deleted by emitLocations.
- delete[] MOutLocs;
- delete[] MInLocs;
-
delete MTracker;
delete TTracker;
MTracker = nullptr;
@@ -3376,9 +3409,10 @@ public:
/// Machine location where any PHI must occur.
LocIdx Loc;
/// Table of live-in machine value numbers for blocks / locations.
- ValueIDNum **MLiveIns;
+ const ValueTable *MLiveIns;
- LDVSSAUpdater(LocIdx L, ValueIDNum **MLiveIns) : Loc(L), MLiveIns(MLiveIns) {}
+ LDVSSAUpdater(LocIdx L, const ValueTable *MLiveIns)
+ : Loc(L), MLiveIns(MLiveIns) {}
void reset() {
for (auto &Block : BlockMap)
@@ -3535,11 +3569,13 @@ public:
} // end namespace llvm
-Optional<ValueIDNum> InstrRefBasedLDV::resolveDbgPHIs(MachineFunction &MF,
- ValueIDNum **MLiveOuts,
- ValueIDNum **MLiveIns,
- MachineInstr &Here,
- uint64_t InstrNum) {
+Optional<ValueIDNum> InstrRefBasedLDV::resolveDbgPHIs(
+ MachineFunction &MF, const ValueTable *MLiveOuts,
+ const ValueTable *MLiveIns, MachineInstr &Here, uint64_t InstrNum) {
+ assert(MLiveOuts && MLiveIns &&
+ "Tried to resolve DBG_PHI before location "
+ "tables allocated?");
+
// This function will be called twice per DBG_INSTR_REF, and might end up
// computing lots of SSA information: memoize it.
auto SeenDbgPHIIt = SeenDbgPHIs.find(&Here);
@@ -3553,8 +3589,8 @@ Optional<ValueIDNum> InstrRefBasedLDV::resolveDbgPHIs(MachineFunction &MF,
}
Optional<ValueIDNum> InstrRefBasedLDV::resolveDbgPHIsImpl(
- MachineFunction &MF, ValueIDNum **MLiveOuts, ValueIDNum **MLiveIns,
- MachineInstr &Here, uint64_t InstrNum) {
+ MachineFunction &MF, const ValueTable *MLiveOuts,
+ const ValueTable *MLiveIns, MachineInstr &Here, uint64_t InstrNum) {
// Pick out records of DBG_PHI instructions that have been observed. If there
// are none, then we cannot compute a value number.
auto RangePair = std::equal_range(DebugPHINumToValue.begin(),
@@ -3566,17 +3602,24 @@ Optional<ValueIDNum> InstrRefBasedLDV::resolveDbgPHIsImpl(
if (LowerIt == UpperIt)
return None;
+ // If any DBG_PHIs referred to a location we didn't understand, don't try to
+ // compute a value. There might be scenarios where we could recover a value
+ // for some range of DBG_INSTR_REFs, but at this point we can have high
+ // confidence that we've seen a bug.
+ auto DBGPHIRange = make_range(LowerIt, UpperIt);
+ for (const DebugPHIRecord &DBG_PHI : DBGPHIRange)
+ if (!DBG_PHI.ValueRead)
+ return None;
+
// If there's only one DBG_PHI, then that is our value number.
if (std::distance(LowerIt, UpperIt) == 1)
- return LowerIt->ValueRead;
-
- auto DBGPHIRange = make_range(LowerIt, UpperIt);
+ return *LowerIt->ValueRead;
// Pick out the location (physreg, slot) where any PHIs must occur. It's
// technically possible for us to merge values in different registers in each
// block, but highly unlikely that LLVM will generate such code after register
// allocation.
- LocIdx Loc = LowerIt->ReadLoc;
+ LocIdx Loc = *LowerIt->ReadLoc;
// We have several DBG_PHIs, and a use position (the Here inst). All each
// DBG_PHI does is identify a value at a program position. We can treat each
@@ -3595,7 +3638,7 @@ Optional<ValueIDNum> InstrRefBasedLDV::resolveDbgPHIsImpl(
// for the SSAUpdater.
for (const auto &DBG_PHI : DBGPHIRange) {
LDVSSABlock *Block = Updater.getSSALDVBlock(DBG_PHI.MBB);
- const ValueIDNum &Num = DBG_PHI.ValueRead;
+ const ValueIDNum &Num = *DBG_PHI.ValueRead;
AvailableValues.insert(std::make_pair(Block, Num.asU64()));
}
@@ -3629,7 +3672,7 @@ Optional<ValueIDNum> InstrRefBasedLDV::resolveDbgPHIsImpl(
// Define all the input DBG_PHI values in ValidatedValues.
for (const auto &DBG_PHI : DBGPHIRange) {
LDVSSABlock *Block = Updater.getSSALDVBlock(DBG_PHI.MBB);
- const ValueIDNum &Num = DBG_PHI.ValueRead;
+ const ValueIDNum &Num = *DBG_PHI.ValueRead;
ValidatedValues.insert(std::make_pair(Block, Num));
}
@@ -3654,7 +3697,7 @@ Optional<ValueIDNum> InstrRefBasedLDV::resolveDbgPHIsImpl(
return None;
ValueIDNum ValueToCheck;
- ValueIDNum *BlockLiveOuts = MLiveOuts[PHIIt.first->BB.getNumber()];
+ const ValueTable &BlockLiveOuts = MLiveOuts[PHIIt.first->BB.getNumber()];
auto VVal = ValidatedValues.find(PHIIt.first);
if (VVal == ValidatedValues.end()) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h
index d778561db471..70aae47c8bdc 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h
@@ -10,17 +10,14 @@
#define LLVM_LIB_CODEGEN_LIVEDEBUGVALUES_INSTRREFBASEDLDV_H
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IndexedMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/UniqueVector.h"
#include "llvm/CodeGen/LexicalScopes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/TargetFrameLowering.h"
-#include "llvm/CodeGen/TargetInstrInfo.h"
-#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "LiveDebugValues.h"
@@ -171,6 +168,13 @@ public:
static ValueIDNum TombstoneValue;
};
+/// Type for a table of values in a block.
+using ValueTable = std::unique_ptr<ValueIDNum[]>;
+
+/// Type for a table-of-table-of-values, i.e., the collection of either
+/// live-in or live-out values for each block in the function.
+using FuncValueTable = std::unique_ptr<ValueTable[]>;
+
/// Thin wrapper around an integer -- designed to give more type safety to
/// spill location numbers.
class SpillLocationNo {
@@ -192,7 +196,7 @@ public:
};
/// Meta qualifiers for a value. Pair of whatever expression is used to qualify
-/// the the value, and Boolean of whether or not it's indirect.
+/// the value, and Boolean of whether or not it's indirect.
class DbgValueProperties {
public:
DbgValueProperties(const DIExpression *DIExpr, bool Indirect)
@@ -507,7 +511,7 @@ public:
/// Load values for each location from array of ValueIDNums. Take current
/// bbnum just in case we read a value from a hitherto untouched register.
- void loadFromArray(ValueIDNum *Locs, unsigned NewCurBB) {
+ void loadFromArray(ValueTable &Locs, unsigned NewCurBB) {
CurBB = NewCurBB;
// Iterate over all tracked locations, and load each locations live-in
// value into our local index.
@@ -629,6 +633,19 @@ public:
/// Return true if Idx is a spill machine location.
bool isSpill(LocIdx Idx) const { return LocIdxToLocID[Idx] >= NumRegs; }
+ /// How large is this location (aka, how wide is a value defined there?).
+ unsigned getLocSizeInBits(LocIdx L) const {
+ unsigned ID = LocIdxToLocID[L];
+ if (!isSpill(L)) {
+ return TRI.getRegSizeInBits(Register(ID), MF.getRegInfo());
+ } else {
+ // The slot location on the stack is uninteresting, we care about the
+ // position of the value within the slot (which comes with a size).
+ StackSlotPos Pos = locIDToSpillIdx(ID);
+ return Pos.first;
+ }
+ }
+
MLocIterator begin() { return MLocIterator(LocIdxToIDNum, 0); }
MLocIterator end() {
@@ -851,10 +868,16 @@ private:
/// Record of where we observed a DBG_PHI instruction.
class DebugPHIRecord {
public:
- uint64_t InstrNum; ///< Instruction number of this DBG_PHI.
- MachineBasicBlock *MBB; ///< Block where DBG_PHI occurred.
- ValueIDNum ValueRead; ///< The value number read by the DBG_PHI.
- LocIdx ReadLoc; ///< Register/Stack location the DBG_PHI reads.
+ /// Instruction number of this DBG_PHI.
+ uint64_t InstrNum;
+ /// Block where DBG_PHI occurred.
+ MachineBasicBlock *MBB;
+ /// The value number read by the DBG_PHI -- or None if it didn't refer to
+ /// a value.
+ Optional<ValueIDNum> ValueRead;
+ /// Register/Stack location the DBG_PHI reads -- or None if it referred to
+ /// something unexpected.
+ Optional<LocIdx> ReadLoc;
operator unsigned() const { return InstrNum; }
};
@@ -909,8 +932,8 @@ private:
extractSpillBaseRegAndOffset(const MachineInstr &MI);
/// Observe a single instruction while stepping through a block.
- void process(MachineInstr &MI, ValueIDNum **MLiveOuts = nullptr,
- ValueIDNum **MLiveIns = nullptr);
+ void process(MachineInstr &MI, const ValueTable *MLiveOuts,
+ const ValueTable *MLiveIns);
/// Examines whether \p MI is a DBG_VALUE and notifies trackers.
/// \returns true if MI was recognized and processed.
@@ -918,8 +941,8 @@ private:
/// Examines whether \p MI is a DBG_INSTR_REF and notifies trackers.
/// \returns true if MI was recognized and processed.
- bool transferDebugInstrRef(MachineInstr &MI, ValueIDNum **MLiveOuts,
- ValueIDNum **MLiveIns);
+ bool transferDebugInstrRef(MachineInstr &MI, const ValueTable *MLiveOuts,
+ const ValueTable *MLiveIns);
/// Stores value-information about where this PHI occurred, and what
/// instruction number is associated with it.
@@ -951,13 +974,13 @@ private:
/// \p InstrNum Debug instruction number defined by DBG_PHI instructions.
/// \returns The machine value number at position Here, or None.
Optional<ValueIDNum> resolveDbgPHIs(MachineFunction &MF,
- ValueIDNum **MLiveOuts,
- ValueIDNum **MLiveIns, MachineInstr &Here,
- uint64_t InstrNum);
+ const ValueTable *MLiveOuts,
+ const ValueTable *MLiveIns,
+ MachineInstr &Here, uint64_t InstrNum);
Optional<ValueIDNum> resolveDbgPHIsImpl(MachineFunction &MF,
- ValueIDNum **MLiveOuts,
- ValueIDNum **MLiveIns,
+ const ValueTable *MLiveOuts,
+ const ValueTable *MLiveIns,
MachineInstr &Here,
uint64_t InstrNum);
@@ -975,8 +998,8 @@ private:
/// live-out arrays to the (initialized to zero) multidimensional arrays in
/// \p MInLocs and \p MOutLocs. The outer dimension is indexed by block
/// number, the inner by LocIdx.
- void buildMLocValueMap(MachineFunction &MF, ValueIDNum **MInLocs,
- ValueIDNum **MOutLocs,
+ void buildMLocValueMap(MachineFunction &MF, FuncValueTable &MInLocs,
+ FuncValueTable &MOutLocs,
SmallVectorImpl<MLocTransferMap> &MLocTransfer);
/// Examine the stack indexes (i.e. offsets within the stack) to find the
@@ -987,7 +1010,7 @@ private:
/// the IDF of each register.
void placeMLocPHIs(MachineFunction &MF,
SmallPtrSetImpl<MachineBasicBlock *> &AllBlocks,
- ValueIDNum **MInLocs,
+ FuncValueTable &MInLocs,
SmallVectorImpl<MLocTransferMap> &MLocTransfer);
/// Propagate variable values to blocks in the common case where there's
@@ -1018,7 +1041,7 @@ private:
/// is true, revisiting this block is necessary.
bool mlocJoin(MachineBasicBlock &MBB,
SmallPtrSet<const MachineBasicBlock *, 16> &Visited,
- ValueIDNum **OutLocs, ValueIDNum *InLocs);
+ FuncValueTable &OutLocs, ValueTable &InLocs);
/// Produce a set of blocks that are in the current lexical scope. This means
/// those blocks that contain instructions "in" the scope, blocks where
@@ -1046,11 +1069,11 @@ private:
/// scope, but which do contain DBG_VALUEs, which VarLocBasedImpl tracks
/// locations through.
void buildVLocValueMap(const DILocation *DILoc,
- const SmallSet<DebugVariable, 4> &VarsWeCareAbout,
- SmallPtrSetImpl<MachineBasicBlock *> &AssignBlocks,
- LiveInsT &Output, ValueIDNum **MOutLocs,
- ValueIDNum **MInLocs,
- SmallVectorImpl<VLocTracker> &AllTheVLocs);
+ const SmallSet<DebugVariable, 4> &VarsWeCareAbout,
+ SmallPtrSetImpl<MachineBasicBlock *> &AssignBlocks,
+ LiveInsT &Output, FuncValueTable &MOutLocs,
+ FuncValueTable &MInLocs,
+ SmallVectorImpl<VLocTracker> &AllTheVLocs);
/// Attempt to eliminate un-necessary PHIs on entry to a block. Examines the
/// live-in values coming from predecessors live-outs, and replaces any PHIs
@@ -1068,7 +1091,7 @@ private:
/// \returns Value ID of a machine PHI if an appropriate one is available.
Optional<ValueIDNum>
pickVPHILoc(const MachineBasicBlock &MBB, const DebugVariable &Var,
- const LiveIdxT &LiveOuts, ValueIDNum **MOutLocs,
+ const LiveIdxT &LiveOuts, FuncValueTable &MOutLocs,
const SmallVectorImpl<const MachineBasicBlock *> &BlockOrders);
/// Take collections of DBG_VALUE instructions stored in TTracker, and
@@ -1098,7 +1121,7 @@ private:
bool depthFirstVLocAndEmit(
unsigned MaxNumBlocks, const ScopeToDILocT &ScopeToDILocation,
const ScopeToVarsT &ScopeToVars, ScopeToAssignBlocksT &ScopeToBlocks,
- LiveInsT &Output, ValueIDNum **MOutLocs, ValueIDNum **MInLocs,
+ LiveInsT &Output, FuncValueTable &MOutLocs, FuncValueTable &MInLocs,
SmallVectorImpl<VLocTracker> &AllTheVLocs, MachineFunction &MF,
DenseMap<DebugVariable, unsigned> &AllVarsNumbering,
const TargetPassConfig &TPC);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp
index 40770b15aa35..141008ac2296 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp
@@ -8,14 +8,16 @@
#include "LiveDebugValues.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetMachine.h"
/// \file LiveDebugValues.cpp
///
@@ -65,7 +67,7 @@ public:
static char ID;
LiveDebugValues();
- ~LiveDebugValues() {}
+ ~LiveDebugValues() = default;
/// Calculate the liveness information for the given machine function.
bool runOnMachineFunction(MachineFunction &MF) override;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h
index 8f0b2ec3e1fc..6cc1685c0022 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h
@@ -9,12 +9,11 @@
#ifndef LLVM_LIB_CODEGEN_LIVEDEBUGVALUES_LIVEDEBUGVALUES_H
#define LLVM_LIB_CODEGEN_LIVEDEBUGVALUES_LIVEDEBUGVALUES_H
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/ADT/Triple.h"
-
namespace llvm {
+class MachineDominatorTree;
+class MachineFunction;
+class TargetPassConfig;
+class Triple;
// Inline namespace for types / symbols shared between different
// LiveDebugValues implementations.
@@ -28,7 +27,7 @@ public:
virtual bool ExtendRanges(MachineFunction &MF, MachineDominatorTree *DomTree,
TargetPassConfig *TPC, unsigned InputBBLimit,
unsigned InputDbgValLimit) = 0;
- virtual ~LDVImpl() {}
+ virtual ~LDVImpl() = default;
};
} // namespace SharedLiveDebugValues
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp
index 42a0967bce3f..24c00b8a10ec 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp
@@ -118,18 +118,15 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/UniqueVector.h"
+#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/LexicalScopes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -137,16 +134,11 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Config/llvm-config.h"
-#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/Module.h"
-#include "llvm/InitializePasses.h"
#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
-#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/TypeSize.h"
#include "llvm/Support/raw_ostream.h"
@@ -922,14 +914,14 @@ private:
std::unique_ptr<VarLocSet> &VLS = Locs[MBB];
if (!VLS)
VLS = std::make_unique<VarLocSet>(Alloc);
- return *VLS.get();
+ return *VLS;
}
const VarLocSet &getVarLocsInMBB(const MachineBasicBlock *MBB,
const VarLocInMBB &Locs) const {
auto It = Locs.find(MBB);
assert(It != Locs.end() && "MBB not in map");
- return *It->second.get();
+ return *It->second;
}
/// Tests whether this instruction is a spill to a stack location.
@@ -1035,9 +1027,9 @@ public:
// Implementation
//===----------------------------------------------------------------------===//
-VarLocBasedLDV::VarLocBasedLDV() { }
+VarLocBasedLDV::VarLocBasedLDV() = default;
-VarLocBasedLDV::~VarLocBasedLDV() { }
+VarLocBasedLDV::~VarLocBasedLDV() = default;
/// Erase a variable from the set of open ranges, and additionally erase any
/// fragments that may overlap it. If the VarLoc is a backup location, erase
@@ -1948,7 +1940,7 @@ bool VarLocBasedLDV::join(
// Just copy over the Out locs to incoming locs for the first visited
// predecessor, and for all other predecessors join the Out locs.
- VarLocSet &OutLocVLS = *OL->second.get();
+ VarLocSet &OutLocVLS = *OL->second;
if (!NumVisited)
InLocsT = OutLocVLS;
else
@@ -2007,7 +1999,7 @@ void VarLocBasedLDV::flushPendingLocs(VarLocInMBB &PendingInLocs,
for (auto &Iter : PendingInLocs) {
// Map is keyed on a constant pointer, unwrap it so we can insert insts.
auto &MBB = const_cast<MachineBasicBlock &>(*Iter.first);
- VarLocSet &Pending = *Iter.second.get();
+ VarLocSet &Pending = *Iter.second;
SmallVector<VarLoc, 32> VarLocs;
collectAllVarLocs(VarLocs, Pending, VarLocIDs);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp
index 6d806135240e..35cf25330186 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp
@@ -28,6 +28,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/LexicalScopes.h"
#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervals.h"
@@ -38,11 +39,9 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
-#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
@@ -50,15 +49,12 @@
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/Metadata.h"
#include "llvm/InitializePasses.h"
-#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
#include <algorithm>
#include <cassert>
#include <iterator>
@@ -976,7 +972,7 @@ void UserValue::extendDef(
if (Segment->end < Stop) {
Stop = Segment->end;
Kills = {Stop, {LII.first}};
- } else if (Segment->end == Stop && Kills.hasValue()) {
+ } else if (Segment->end == Stop && Kills) {
// If multiple locations end at the same place, track all of them in
// Kills.
Kills->second.push_back(LII.first);
@@ -1854,16 +1850,33 @@ void LDVImpl::emitDebugValues(VirtRegMap *VRM) {
const TargetRegisterClass *TRC = MRI.getRegClass(Reg);
unsigned SpillSize, SpillOffset;
- // Test whether this location is legal with the given subreg.
+ unsigned regSizeInBits = TRI->getRegSizeInBits(*TRC);
+ if (SubReg)
+ regSizeInBits = TRI->getSubRegIdxSize(SubReg);
+
+ // Test whether this location is legal with the given subreg. If the
+ // subregister has a nonzero offset, drop this location, it's too complex
+ // to describe. (TODO: future work).
bool Success =
TII->getStackSlotRange(TRC, SubReg, SpillSize, SpillOffset, *MF);
- if (Success) {
+ if (Success && SpillOffset == 0) {
auto Builder = BuildMI(*OrigMBB, OrigMBB->begin(), DebugLoc(),
TII->get(TargetOpcode::DBG_PHI));
Builder.addFrameIndex(VRM->getStackSlot(Reg));
Builder.addImm(InstNum);
+ // Record how large the original value is. The stack slot might be
+ // merged and altered during optimisation, but we will want to know how
+ // large the value is, at this DBG_PHI.
+ Builder.addImm(regSizeInBits);
+ }
+
+ LLVM_DEBUG(
+ if (SpillOffset != 0) {
+ dbgs() << "DBG_PHI for Vreg " << Reg << " subreg " << SubReg <<
+ " has nonzero offset\n";
}
+ );
}
// If there was no mapping for a value ID, it's optimized out. Create no
// DBG_PHI, and any variables using this value will become optimized out.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveInterval.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveInterval.cpp
index 9ded0fb6ae0a..9378aaeb181c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveInterval.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveInterval.cpp
@@ -348,23 +348,8 @@ private:
//===----------------------------------------------------------------------===//
LiveRange::iterator LiveRange::find(SlotIndex Pos) {
- // This algorithm is basically std::upper_bound.
- // Unfortunately, std::upper_bound cannot be used with mixed types until we
- // adopt C++0x. Many libraries can do it, but not all.
- if (empty() || Pos >= endIndex())
- return end();
- iterator I = begin();
- size_t Len = size();
- do {
- size_t Mid = Len >> 1;
- if (Pos < I[Mid].end) {
- Len = Mid;
- } else {
- I += Mid + 1;
- Len -= Mid + 1;
- }
- } while (Len);
- return I;
+ return llvm::partition_point(*this,
+ [&](const Segment &X) { return X.end <= Pos; });
}
VNInfo *LiveRange::createDeadDef(SlotIndex Def, VNInfo::Allocator &VNIAlloc) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalCalc.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalCalc.cpp
index 2756086cb8b1..3176d73b35f6 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalCalc.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalCalc.cpp
@@ -11,13 +11,9 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/LiveIntervalCalc.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/iterator_range.h"
#include "llvm/CodeGen/LiveInterval.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -25,12 +21,7 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
#include <cassert>
-#include <iterator>
-#include <tuple>
-#include <utility>
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalUnion.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalUnion.cpp
index 50b31e1eb247..11a4ecf0bef9 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalUnion.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalUnion.cpp
@@ -26,7 +26,8 @@ using namespace llvm;
#define DEBUG_TYPE "regalloc"
// Merge a LiveInterval's segments. Guarantee no overlaps.
-void LiveIntervalUnion::unify(LiveInterval &VirtReg, const LiveRange &Range) {
+void LiveIntervalUnion::unify(const LiveInterval &VirtReg,
+ const LiveRange &Range) {
if (Range.empty())
return;
++Tag;
@@ -53,7 +54,8 @@ void LiveIntervalUnion::unify(LiveInterval &VirtReg, const LiveRange &Range) {
}
// Remove a live virtual register's segments from this union.
-void LiveIntervalUnion::extract(LiveInterval &VirtReg, const LiveRange &Range) {
+void LiveIntervalUnion::extract(const LiveInterval &VirtReg,
+ const LiveRange &Range) {
if (Range.empty())
return;
++Tag;
@@ -99,7 +101,7 @@ void LiveIntervalUnion::verify(LiveVirtRegBitSet& VisitedVRegs) {
}
#endif //!NDEBUG
-LiveInterval *LiveIntervalUnion::getOneVReg() const {
+const LiveInterval *LiveIntervalUnion::getOneVReg() const {
if (empty())
return nullptr;
for (LiveSegments::const_iterator SI = Segments.begin(); SI.valid(); ++SI) {
@@ -111,7 +113,8 @@ LiveInterval *LiveIntervalUnion::getOneVReg() const {
// Scan the vector of interfering virtual registers in this union. Assume it's
// quite small.
-bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const {
+bool LiveIntervalUnion::Query::isSeenInterference(
+ const LiveInterval *VirtReg) const {
return is_contained(InterferingVRegs, VirtReg);
}
@@ -147,14 +150,14 @@ LiveIntervalUnion::Query::collectInterferingVRegs(unsigned MaxInterferingRegs) {
}
LiveRange::const_iterator LREnd = LR->end();
- LiveInterval *RecentReg = nullptr;
+ const LiveInterval *RecentReg = nullptr;
while (LiveUnionI.valid()) {
assert(LRI != LREnd && "Reached end of LR");
// Check for overlapping interference.
while (LRI->start < LiveUnionI.stop() && LRI->end > LiveUnionI.start()) {
// This is an overlap, record the interfering register.
- LiveInterval *VReg = LiveUnionI.value();
+ const LiveInterval *VReg = LiveUnionI.value();
if (VReg != RecentReg && !isSeenInterference(VReg)) {
RecentReg = VReg;
InterferingVRegs.push_back(VReg);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp
index 9571afa434c1..7d825a8bf853 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp
@@ -33,22 +33,20 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/Config/llvm-config.h"
-#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Statepoint.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
-#include "llvm/Support/BlockFrequency.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/CodeGen/StackMaps.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -149,7 +147,7 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
getRegUnit(i);
}
LLVM_DEBUG(dump());
- return true;
+ return false;
}
void LiveIntervals::print(raw_ostream &OS, const Module* ) const {
@@ -500,7 +498,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
// Create new live ranges with only minimal live segments per def.
LiveRange NewLR;
- createSegmentsForValues(NewLR, make_range(li->vni_begin(), li->vni_end()));
+ createSegmentsForValues(NewLR, li->vnis());
extendSegmentsToUses(NewLR, WorkList, Reg, LaneBitmask::getNone());
// Move the trimmed segments back.
@@ -604,7 +602,7 @@ void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, Register Reg) {
// Create a new live ranges with only minimal live segments per def.
LiveRange NewLR;
- createSegmentsForValues(NewLR, make_range(SR.vni_begin(), SR.vni_end()));
+ createSegmentsForValues(NewLR, SR.vnis());
extendSegmentsToUses(NewLR, WorkList, Reg, SR.LaneMask);
// Move the trimmed ranges back.
@@ -913,11 +911,11 @@ static bool hasLiveThroughUse(const MachineInstr *MI, Register Reg) {
return false;
}
-bool LiveIntervals::checkRegMaskInterference(LiveInterval &LI,
+bool LiveIntervals::checkRegMaskInterference(const LiveInterval &LI,
BitVector &UsableRegs) {
if (LI.empty())
return false;
- LiveInterval::iterator LiveI = LI.begin(), LiveE = LI.end();
+ LiveInterval::const_iterator LiveI = LI.begin(), LiveE = LI.end();
// Use a smaller arrays for local live ranges.
ArrayRef<SlotIndex> Slots;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeCalc.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeCalc.cpp
index 3ef28042acb0..26f6e1ede1ad 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeCalc.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeCalc.cpp
@@ -20,11 +20,9 @@
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
-#include "llvm/MC/LaneBitmask.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp
index 05768140cbdf..58eb4110f153 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp
@@ -371,7 +371,7 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink,
const MachineOperand &MO = MI->getOperand(i-1);
if (MO.isReg() && Register::isPhysicalRegister(MO.getReg()))
continue;
- MI->RemoveOperand(i-1);
+ MI->removeOperand(i-1);
}
LLVM_DEBUG(dbgs() << "Converted physregs to:\t" << *MI);
} else {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeShrink.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeShrink.cpp
index 054f4370b609..8e56985246db 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeShrink.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeShrink.cpp
@@ -23,7 +23,6 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveRegMatrix.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveRegMatrix.cpp
index 4c0172a930b5..6ca7f00a7885 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveRegMatrix.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveRegMatrix.cpp
@@ -78,13 +78,13 @@ void LiveRegMatrix::releaseMemory() {
template <typename Callable>
static bool foreachUnit(const TargetRegisterInfo *TRI,
- LiveInterval &VRegInterval, MCRegister PhysReg,
+ const LiveInterval &VRegInterval, MCRegister PhysReg,
Callable Func) {
if (VRegInterval.hasSubRanges()) {
for (MCRegUnitMaskIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
unsigned Unit = (*Units).first;
LaneBitmask Mask = (*Units).second;
- for (LiveInterval::SubRange &S : VRegInterval.subranges()) {
+ for (const LiveInterval::SubRange &S : VRegInterval.subranges()) {
if ((S.LaneMask & Mask).any()) {
if (Func(Unit, S))
return true;
@@ -101,7 +101,7 @@ static bool foreachUnit(const TargetRegisterInfo *TRI,
return false;
}
-void LiveRegMatrix::assign(LiveInterval &VirtReg, MCRegister PhysReg) {
+void LiveRegMatrix::assign(const LiveInterval &VirtReg, MCRegister PhysReg) {
LLVM_DEBUG(dbgs() << "assigning " << printReg(VirtReg.reg(), TRI) << " to "
<< printReg(PhysReg, TRI) << ':');
assert(!VRM->hasPhys(VirtReg.reg()) && "Duplicate VirtReg assignment");
@@ -118,7 +118,7 @@ void LiveRegMatrix::assign(LiveInterval &VirtReg, MCRegister PhysReg) {
LLVM_DEBUG(dbgs() << '\n');
}
-void LiveRegMatrix::unassign(LiveInterval &VirtReg) {
+void LiveRegMatrix::unassign(const LiveInterval &VirtReg) {
Register PhysReg = VRM->getPhys(VirtReg.reg());
LLVM_DEBUG(dbgs() << "unassigning " << printReg(VirtReg.reg(), TRI)
<< " from " << printReg(PhysReg, TRI) << ':');
@@ -143,7 +143,7 @@ bool LiveRegMatrix::isPhysRegUsed(MCRegister PhysReg) const {
return false;
}
-bool LiveRegMatrix::checkRegMaskInterference(LiveInterval &VirtReg,
+bool LiveRegMatrix::checkRegMaskInterference(const LiveInterval &VirtReg,
MCRegister PhysReg) {
// Check if the cached information is valid.
// The same BitVector can be reused for all PhysRegs.
@@ -161,7 +161,7 @@ bool LiveRegMatrix::checkRegMaskInterference(LiveInterval &VirtReg,
return !RegMaskUsable.empty() && (!PhysReg || !RegMaskUsable.test(PhysReg));
}
-bool LiveRegMatrix::checkRegUnitInterference(LiveInterval &VirtReg,
+bool LiveRegMatrix::checkRegUnitInterference(const LiveInterval &VirtReg,
MCRegister PhysReg) {
if (VirtReg.empty())
return false;
@@ -183,7 +183,8 @@ LiveIntervalUnion::Query &LiveRegMatrix::query(const LiveRange &LR,
}
LiveRegMatrix::InterferenceKind
-LiveRegMatrix::checkInterference(LiveInterval &VirtReg, MCRegister PhysReg) {
+LiveRegMatrix::checkInterference(const LiveInterval &VirtReg,
+ MCRegister PhysReg) {
if (VirtReg.empty())
return IK_Free;
@@ -237,7 +238,7 @@ bool LiveRegMatrix::checkInterference(SlotIndex Start, SlotIndex End,
}
Register LiveRegMatrix::getOneVReg(unsigned PhysReg) const {
- LiveInterval *VRegInterval = nullptr;
+ const LiveInterval *VRegInterval = nullptr;
for (MCRegUnitIterator Unit(PhysReg, TRI); Unit.isValid(); ++Unit) {
if ((VRegInterval = Matrix[*Unit].getOneVReg()))
return VRegInterval->reg();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveStacks.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveStacks.cpp
index 8df84ebf4f06..8fc5a929d77b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveStacks.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveStacks.cpp
@@ -13,12 +13,9 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/LiveStacks.h"
-#include "llvm/CodeGen/LiveIntervals.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/InitializePasses.h"
using namespace llvm;
#define DEBUG_TYPE "livestacks"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
index 37fd3e4853ac..5f54d7cc8472 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -23,7 +23,6 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -118,7 +117,7 @@ bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) {
// If the target doesn't want/need this pass, or if there are no locals
// to consider, early exit.
if (LocalObjectCount == 0 || !TRI->requiresVirtualBaseRegisters(MF))
- return true;
+ return false;
// Make sure we have enough space to store the local offsets.
LocalOffsets.resize(MFI.getObjectIndexEnd());
@@ -344,7 +343,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
MachineBasicBlock *Entry = &Fn.front();
- unsigned BaseReg = 0;
+ Register BaseReg;
int64_t BaseOffset = 0;
// Loop through the frame references and allocate for them as necessary.
@@ -414,20 +413,14 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
continue;
}
- const MachineFunction *MF = MI.getMF();
- const TargetRegisterClass *RC = TRI->getPointerRegClass(*MF);
- BaseReg = Fn.getRegInfo().createVirtualRegister(RC);
-
- LLVM_DEBUG(dbgs() << " Materializing base register"
- << " at frame local offset "
- << LocalOffset + InstrOffset);
-
// Tell the target to insert the instruction to initialize
// the base register.
// MachineBasicBlock::iterator InsertionPt = Entry->begin();
BaseReg = TRI->materializeFrameBaseRegister(Entry, FrameIdx, InstrOffset);
- LLVM_DEBUG(dbgs() << " into " << printReg(BaseReg, TRI) << '\n');
+ LLVM_DEBUG(dbgs() << " Materialized base register at frame local offset "
+ << LocalOffset + InstrOffset
+ << " into " << printReg(BaseReg, TRI) << '\n');
// The base register already includes any offset specified
// by the instruction, so account for that so it doesn't get
@@ -437,7 +430,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
++NumBaseRegisters;
UsedBaseReg = true;
}
- assert(BaseReg != 0 && "Unable to allocate virtual base register!");
+ assert(BaseReg && "Unable to allocate virtual base register!");
// Modify the instruction to use the new base register rather
// than the frame index operand.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LowLevelType.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LowLevelType.cpp
index dce64ab9f5ca..b47c96e50831 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LowLevelType.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LowLevelType.cpp
@@ -15,7 +15,6 @@
#include "llvm/ADT/APFloat.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
-#include "llvm/Support/raw_ostream.h"
using namespace llvm;
LLT llvm::getLLTForType(Type &Ty, const DataLayout &DL) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp
index a06d1d6255c7..984dc452fbfd 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp
@@ -17,7 +17,6 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
index 3ec8c627f131..eea24d8e9353 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
@@ -27,15 +27,12 @@
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include <queue>
-
using namespace llvm;
#define DEBUG_TYPE "mir-canonicalizer"
@@ -106,10 +103,7 @@ rescheduleLexographically(std::vector<MachineInstr *> instructions,
StringInstrMap.push_back({(i == std::string::npos) ? S : S.substr(i), II});
}
- llvm::sort(StringInstrMap,
- [](const StringInstrPair &a, const StringInstrPair &b) -> bool {
- return (a.first < b.first);
- });
+ llvm::sort(StringInstrMap, llvm::less_first());
for (auto &II : StringInstrMap) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRFSDiscriminator.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRFSDiscriminator.cpp
index bf78594e9b23..3152102410d7 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRFSDiscriminator.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRFSDiscriminator.cpp
@@ -15,12 +15,14 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/Function.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h"
-#include <unordered_map>
using namespace llvm;
using namespace sampleprof;
@@ -68,6 +70,8 @@ static uint64_t getCallStackHash(const MachineBasicBlock &BB,
bool MIRAddFSDiscriminators::runOnMachineFunction(MachineFunction &MF) {
if (!EnableFSDiscriminator)
return false;
+ if (!MF.getFunction().isDebugInfoForProfiling())
+ return false;
bool Changed = false;
using LocationDiscriminator = std::tuple<StringRef, unsigned, unsigned>;
@@ -131,6 +135,7 @@ bool MIRAddFSDiscriminators::runOnMachineFunction(MachineFunction &MF) {
if (Changed) {
createFSDiscriminatorVariable(MF.getFunction().getParent());
LLVM_DEBUG(dbgs() << "Num of FS Discriminators: " << NumNewD << "\n");
+ (void) NumNewD;
}
return Changed;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRNamerPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRNamerPass.cpp
index 9f61dd9ef243..bc65700aba06 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRNamerPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRNamerPass.cpp
@@ -18,11 +18,7 @@
#include "MIRVRegNamerUtils.h"
#include "llvm/ADT/PostOrderIterator.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/InitializePasses.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp
index 0ca820f160aa..b0daa20913f5 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp
@@ -15,7 +15,6 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
-#include <algorithm>
#include <cassert>
#include <cctype>
#include <string>
@@ -250,7 +249,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
.Case("dereferenceable", MIToken::kw_dereferenceable)
.Case("invariant", MIToken::kw_invariant)
.Case("align", MIToken::kw_align)
- .Case("basealign", MIToken::kw_align)
+ .Case("basealign", MIToken::kw_basealign)
.Case("addrspace", MIToken::kw_addrspace)
.Case("stack", MIToken::kw_stack)
.Case("got", MIToken::kw_got)
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp
index 6477965bdc21..40ae7053ea09 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp
@@ -26,8 +26,6 @@
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/AsmParser/Parser.h"
#include "llvm/AsmParser/SlotMapping.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/MIRFormatter.h"
#include "llvm/CodeGen/MIRPrinter.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -38,6 +36,8 @@
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterBank.h"
+#include "llvm/CodeGen/RegisterBankInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
@@ -60,7 +60,6 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCInstrDesc.h"
-#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/Casting.h"
@@ -69,10 +68,8 @@
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetMachine.h"
-#include <algorithm>
#include <cassert>
#include <cctype>
#include <cstddef>
@@ -744,7 +741,7 @@ bool MIParser::parseBasicBlockDefinition(
MBB->setIsEHPad(IsLandingPad);
MBB->setIsInlineAsmBrIndirectTarget(IsInlineAsmBrIndirectTarget);
MBB->setIsEHFuncletEntry(IsEHFuncletEntry);
- if (SectionID.hasValue()) {
+ if (SectionID) {
MBB->setSectionID(SectionID.getValue());
MF.setBBSectionsType(BasicBlockSection::List);
}
@@ -1094,11 +1091,23 @@ bool MIParser::parse(MachineInstr *&MI) {
return true;
}
- // TODO: Check for extraneous machine operands.
MI = MF.CreateMachineInstr(MCID, DebugLocation, /*NoImplicit=*/true);
MI->setFlags(Flags);
- for (const auto &Operand : Operands)
+
+ unsigned NumExplicitOps = 0;
+ for (const auto &Operand : Operands) {
+ bool IsImplicitOp = Operand.Operand.isReg() && Operand.Operand.isImplicit();
+ if (!IsImplicitOp) {
+ if (!MCID.isVariadic() && NumExplicitOps >= MCID.getNumOperands() &&
+ !Operand.Operand.isValidExcessOperand())
+ return error(Operand.Begin, "too many operands for instruction");
+
+ ++NumExplicitOps;
+ }
+
MI->addOperand(MF, Operand.Operand);
+ }
+
if (assignRegisterTies(*MI, Operands))
return true;
if (PreInstrSymbol)
@@ -1609,7 +1618,7 @@ bool MIParser::assignRegisterTies(MachineInstr &MI,
continue;
// The parser ensures that this operand is a register use, so we just have
// to check the tied-def operand.
- unsigned DefIdx = Operands[I].TiedDefIdx.getValue();
+ unsigned DefIdx = *Operands[I].TiedDefIdx;
if (DefIdx >= E)
return error(Operands[I].Begin,
Twine("use of invalid tied-def operand index '" +
@@ -1714,6 +1723,15 @@ bool MIParser::parseRegisterOperand(MachineOperand &Dest,
RegInfo->Kind == VRegInfo::REGBANK)
return error("generic virtual registers must have a type");
}
+
+ if (Flags & RegState::Define) {
+ if (Flags & RegState::Kill)
+ return error("cannot have a killed def operand");
+ } else {
+ if (Flags & RegState::Dead)
+ return error("cannot have a dead use operand");
+ }
+
Dest = MachineOperand::CreateReg(
Reg, Flags & RegState::Define, Flags & RegState::Implicit,
Flags & RegState::Kill, Flags & RegState::Dead, Flags & RegState::Undef,
@@ -2689,19 +2707,19 @@ bool MIParser::parseCustomRegisterMaskOperand(MachineOperand &Dest) {
return true;
uint32_t *Mask = MF.allocateRegMask();
- while (true) {
- if (Token.isNot(MIToken::NamedRegister))
- return error("expected a named register");
- Register Reg;
- if (parseNamedRegister(Reg))
- return true;
- lex();
- Mask[Reg / 32] |= 1U << (Reg % 32);
+ do {
+ if (Token.isNot(MIToken::rparen)) {
+ if (Token.isNot(MIToken::NamedRegister))
+ return error("expected a named register");
+ Register Reg;
+ if (parseNamedRegister(Reg))
+ return true;
+ lex();
+ Mask[Reg / 32] |= 1U << (Reg % 32);
+ }
+
// TODO: Report an error if the same register is used more than once.
- if (Token.isNot(MIToken::comma))
- break;
- lex();
- }
+ } while (consumeIfPresent(MIToken::comma));
if (expectAndConsume(MIToken::rparen))
return true;
@@ -3269,11 +3287,21 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) {
MDNode *Range = nullptr;
while (consumeIfPresent(MIToken::comma)) {
switch (Token.kind()) {
- case MIToken::kw_align:
+ case MIToken::kw_align: {
// align is printed if it is different than size.
- if (parseAlignment(BaseAlignment))
+ uint64_t Alignment;
+ if (parseAlignment(Alignment))
return true;
+ if (Ptr.Offset & (Alignment - 1)) {
+ // MachineMemOperand::getAlign never returns a value greater than the
+ // alignment of offset, so this just guards against hand-written MIR
+ // that specifies a large "align" value when it should probably use
+ // "basealign" instead.
+ return error("specified alignment is more aligned than offset");
+ }
+ BaseAlignment = Alignment;
break;
+ }
case MIToken::kw_basealign:
// basealign is printed if it is different than align.
if (parseAlignment(BaseAlignment))
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
index f144639770bc..4944cb46c5b5 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
@@ -13,13 +13,10 @@
#include "llvm/CodeGen/MIRParser/MIRParser.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/AsmParser/Parser.h"
#include "llvm/AsmParser/SlotMapping.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/MIRParser/MIParser.h"
#include "llvm/CodeGen/MIRYamlMapping.h"
#include "llvm/CodeGen/MachineConstantPool.h"
@@ -29,7 +26,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
@@ -46,6 +43,8 @@
using namespace llvm;
namespace llvm {
+class MDNode;
+class RegisterBank;
/// This class implements the parsing of LLVM IR that's embedded inside a MIR
/// file.
@@ -459,6 +458,12 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF,
MF.setExposesReturnsTwice(YamlMF.ExposesReturnsTwice);
MF.setHasWinCFI(YamlMF.HasWinCFI);
+ MF.setCallsEHReturn(YamlMF.CallsEHReturn);
+ MF.setCallsUnwindInit(YamlMF.CallsUnwindInit);
+ MF.setHasEHCatchret(YamlMF.HasEHCatchret);
+ MF.setHasEHScopes(YamlMF.HasEHScopes);
+ MF.setHasEHFunclets(YamlMF.HasEHFunclets);
+
if (YamlMF.Legalized)
MF.getProperties().set(MachineFunctionProperties::Property::Legalized);
if (YamlMF.RegBankSelected)
@@ -638,7 +643,7 @@ bool MIRParserImpl::parseRegisterInfo(PerFunctionMIParsingState &PFS,
// be saved for the caller).
if (YamlMF.CalleeSavedRegisters) {
SmallVector<MCPhysReg, 16> CalleeSavedRegisters;
- for (const auto &RegSource : YamlMF.CalleeSavedRegisters.getValue()) {
+ for (const auto &RegSource : *YamlMF.CalleeSavedRegisters) {
Register Reg;
if (parseNamedRegisterReference(PFS, Reg, RegSource.Value, Error))
return error(Error, RegSource.SourceRange);
@@ -809,7 +814,7 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS,
Object.CalleeSavedRestored, ObjectIdx))
return true;
if (Object.LocalOffset)
- MFI.mapLocalFrameObject(ObjectIdx, Object.LocalOffset.getValue());
+ MFI.mapLocalFrameObject(ObjectIdx, *Object.LocalOffset);
if (parseStackObjectsDebugInfo(PFS, Object, ObjectIdx))
return true;
}
@@ -826,6 +831,15 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS,
return error(Error, YamlMFI.StackProtector.SourceRange);
MFI.setStackProtectorIndex(FI);
}
+
+ if (!YamlMFI.FunctionContext.Value.empty()) {
+ SMDiagnostic Error;
+ int FI;
+ if (parseStackObjectReference(PFS, FI, YamlMFI.FunctionContext.Value, Error))
+ return error(Error, YamlMFI.FunctionContext.SourceRange);
+ MFI.setFunctionContextIndex(FI);
+ }
+
return false;
}
@@ -909,7 +923,7 @@ bool MIRParserImpl::initializeConstantPool(PerFunctionMIParsingState &PFS,
return error(Error, YamlConstant.Value.SourceRange);
const Align PrefTypeAlign =
M.getDataLayout().getPrefTypeAlign(Value->getType());
- const Align Alignment = YamlConstant.Alignment.getValueOr(PrefTypeAlign);
+ const Align Alignment = YamlConstant.Alignment.value_or(PrefTypeAlign);
unsigned Index = ConstantPool.getConstantPoolIndex(Value, Alignment);
if (!ConstantPoolSlots.insert(std::make_pair(YamlConstant.ID.Value, Index))
.second)
@@ -1023,7 +1037,7 @@ SMDiagnostic MIRParserImpl::diagFromBlockStringDiag(const SMDiagnostic &Error,
MIRParser::MIRParser(std::unique_ptr<MIRParserImpl> Impl)
: Impl(std::move(Impl)) {}
-MIRParser::~MIRParser() {}
+MIRParser::~MIRParser() = default;
std::unique_ptr<Module>
MIRParser::parseIRModule(DataLayoutCallbackTy DataLayoutCallback) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp
index dc72f83ad0e4..25823b1567f7 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp
@@ -13,14 +13,11 @@
#include "llvm/CodeGen/MIRPrinter.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/None.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
#include "llvm/CodeGen/MIRYamlMapping.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
@@ -32,29 +29,19 @@
#include "llvm/CodeGen/MachineModuleSlotTracker.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/IRPrintingPasses.h"
-#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/ModuleSlotTracker.h"
#include "llvm/IR/Value.h"
#include "llvm/MC/LaneBitmask.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCDwarf.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -63,7 +50,6 @@
#include "llvm/Support/LowLevelTypeImpl.h"
#include "llvm/Support/YAMLTraits.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetMachine.h"
#include <algorithm>
#include <cassert>
@@ -209,6 +195,12 @@ void MIRPrinter::print(const MachineFunction &MF) {
YamlMF.ExposesReturnsTwice = MF.exposesReturnsTwice();
YamlMF.HasWinCFI = MF.hasWinCFI();
+ YamlMF.CallsEHReturn = MF.callsEHReturn();
+ YamlMF.CallsUnwindInit = MF.callsUnwindInit();
+ YamlMF.HasEHCatchret = MF.hasEHCatchret();
+ YamlMF.HasEHScopes = MF.hasEHScopes();
+ YamlMF.HasEHFunclets = MF.hasEHFunclets();
+
YamlMF.Legalized = MF.getProperties().hasProperty(
MachineFunctionProperties::Property::Legalized);
YamlMF.RegBankSelected = MF.getProperties().hasProperty(
@@ -489,6 +481,12 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF,
.printStackObjectReference(MFI.getStackProtectorIndex());
}
+ if (MFI.hasFunctionContextIndex()) {
+ raw_string_ostream StrOS(YMF.FrameInfo.FunctionContext.Value);
+ MIPrinter(StrOS, MST, RegisterMaskIds, StackObjectOperandMapping)
+ .printStackObjectReference(MFI.getFunctionContextIndex());
+ }
+
// Print the debug variable information.
for (const MachineFunction::VariableDbgInfo &DebugVar :
MF.getVariableDbgInfo()) {
@@ -693,11 +691,11 @@ void MIPrinter::print(const MachineBasicBlock &MBB) {
// Print the live in registers.
const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
- if (MRI.tracksLiveness() && !MBB.livein_empty()) {
+ if (!MBB.livein_empty()) {
const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
OS.indent(2) << "liveins: ";
bool First = true;
- for (const auto &LI : MBB.liveins()) {
+ for (const auto &LI : MBB.liveins_dbg()) {
if (!First)
OS << ", ";
First = false;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRSampleProfile.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRSampleProfile.cpp
index b742ad9823c9..a8996a586909 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRSampleProfile.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRSampleProfile.cpp
@@ -15,7 +15,15 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Function.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp
index 5862504109f0..a2abe71a6bd7 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp
@@ -10,7 +10,6 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineStableHash.h"
#include "llvm/IR/Constants.h"
-#include "llvm/Support/Debug.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp
index 33782c755eb0..7daf9025d303 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp
@@ -10,17 +10,19 @@
//
//===----------------------------------------------------------------------===//
+#include "AllocationOrder.h"
#include "RegAllocEvictionAdvisor.h"
#include "RegAllocGreedy.h"
-#include "RegAllocScore.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/MLModelRunner.h"
+#include "llvm/Analysis/TensorSpec.h"
+#if defined(LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL) || defined(LLVM_HAVE_TF_API)
#include "llvm/Analysis/ModelUnderTrainingRunner.h"
#include "llvm/Analysis/NoInferenceModelRunner.h"
+#endif
#include "llvm/Analysis/ReleaseModeModelRunner.h"
-#include "llvm/Analysis/Utils/TFUtils.h"
#include "llvm/CodeGen/CalcSpillWeights.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/LiveRegMatrix.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
@@ -28,13 +30,11 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
-#include "llvm/Config/config.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/PassRegistry.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetMachine.h"
#include <array>
#include <memory>
@@ -46,10 +46,16 @@ using namespace llvm;
// Generated header in release (AOT) mode
#if defined(LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL)
#include "RegallocEvictModel.h"
+using CompiledModelType = RegallocEvictModel;
+#else
+using CompiledModelType = NoopSavedModelImpl;
#endif
// Options that only make sense in development mode
#ifdef LLVM_HAVE_TF_API
+#include "RegAllocScore.h"
+#include "llvm/Analysis/Utils/TFUtils.h"
+
static cl::opt<std::string> TrainingLog(
"regalloc-training-log", cl::Hidden,
cl::desc("Training log for the register allocator eviction model"));
@@ -60,6 +66,8 @@ static cl::opt<std::string> ModelUnderTraining(
#endif // #ifdef LLVM_HAVE_TF_API
+extern cl::opt<unsigned> EvictInterferenceCutoff;
+
/// The score injection pass.
/// This pass calculates the score for a function and inserts it in the log, but
/// this happens only in development mode. It's a no-op otherwise.
@@ -240,8 +248,8 @@ using FeaturesListNormalizer = std::array<float, FeatureIDs::FeatureCount>;
/// The ML evictor (commonalities between release and development mode)
class MLEvictAdvisor : public RegAllocEvictionAdvisor {
public:
- MLEvictAdvisor(MachineFunction &MF, const RAGreedy &RA, MLModelRunner *Runner,
- const MachineBlockFrequencyInfo &MBFI,
+ MLEvictAdvisor(const MachineFunction &MF, const RAGreedy &RA,
+ MLModelRunner *Runner, const MachineBlockFrequencyInfo &MBFI,
const MachineLoopInfo &Loops);
protected:
@@ -257,14 +265,16 @@ protected:
/// if we're just capturing the log of the default advisor, it needs to call
/// the latter instead, so we need to pass all the necessary parameters for
/// it. In the development case, it will also log.
- virtual int64_t tryFindEvictionCandidatePosition(
- LiveInterval &VirtReg, const AllocationOrder &Order, unsigned OrderLimit,
- uint8_t CostPerUseLimit, const SmallVirtRegSet &FixedRegisters) const;
+ virtual int64_t
+ tryFindEvictionCandidatePosition(const LiveInterval &VirtReg,
+ const AllocationOrder &Order,
+ unsigned OrderLimit, uint8_t CostPerUseLimit,
+ const SmallVirtRegSet &FixedRegisters) const;
/// Load the features of the given VirtReg (allocated or not) at column Pos,
/// but if that can't be evicted, return false instead.
bool
- loadInterferenceFeatures(LiveInterval &VirtReg, MCRegister PhysReg,
+ loadInterferenceFeatures(const LiveInterval &VirtReg, MCRegister PhysReg,
bool IsHint, const SmallVirtRegSet &FixedRegisters,
std::array<float, FeatureIDs::FeatureCount> &Largest,
size_t Pos) const;
@@ -273,24 +283,24 @@ private:
static float getInitialQueueSize(const MachineFunction &MF);
MCRegister tryFindEvictionCandidate(
- LiveInterval &VirtReg, const AllocationOrder &Order,
+ const LiveInterval &VirtReg, const AllocationOrder &Order,
uint8_t CostPerUseLimit,
const SmallVirtRegSet &FixedRegisters) const override;
- void extractFeatures(const SmallVectorImpl<LiveInterval *> &Intervals,
+ void extractFeatures(const SmallVectorImpl<const LiveInterval *> &Intervals,
std::array<float, FeatureIDs::FeatureCount> &Largest,
size_t Pos, int64_t IsHint, int64_t LocalIntfsCount,
float NrUrgent) const;
// Point-in-time: we didn't learn this, so we always delegate to the default.
bool canEvictHintInterference(
- LiveInterval &VirtReg, MCRegister PhysReg,
+ const LiveInterval &VirtReg, MCRegister PhysReg,
const SmallVirtRegSet &FixedRegisters) const override {
return getDefaultAdvisor().canEvictHintInterference(VirtReg, PhysReg,
FixedRegisters);
}
- const LIFeatureComponents
+ const LIFeatureComponents &
getLIFeatureComponents(const LiveInterval &LI) const;
// Hold on to a default advisor for:
@@ -306,17 +316,21 @@ private:
// This could be static and shared, but its initialization is non-trivial.
std::bitset<FeatureIDs::FeatureCount> DoNotNormalize;
const float InitialQSize;
+
+ using RegID = unsigned;
+ mutable DenseMap<RegID, LIFeatureComponents> CachedFeatures;
};
+#define _DECL_FEATURES(type, name, shape, _) \
+ TensorSpec::createSpec<type>(#name, shape),
+
+static const std::vector<TensorSpec> InputFeatures{
+ {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)},
+};
+#undef _DECL_FEATURES
// ===================================
// Release (AOT) - specifics
// ===================================
-#if defined(LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL)
-const std::array<std::string, FeatureIDs::FeatureCount> FeatureNames{
-#define _GETNAME(_, NAME, __, ___) #NAME,
- RA_EVICT_FEATURES_LIST(_GETNAME)
-#undef _GETNAME
-};
class ReleaseModeEvictionAdvisorAnalysis final
: public RegAllocEvictionAdvisorAnalysis {
public:
@@ -335,17 +349,16 @@ private:
}
std::unique_ptr<RegAllocEvictionAdvisor>
- getAdvisor(MachineFunction &MF, const RAGreedy &RA) override {
+ getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override {
if (!Runner)
- Runner = std::make_unique<ReleaseModeModelRunner<RegallocEvictModel>>(
- MF.getFunction().getContext(), FeatureNames, DecisionName);
+ Runner = std::make_unique<ReleaseModeModelRunner<CompiledModelType>>(
+ MF.getFunction().getContext(), InputFeatures, DecisionName);
return std::make_unique<MLEvictAdvisor>(
MF, RA, Runner.get(), getAnalysis<MachineBlockFrequencyInfo>(),
getAnalysis<MachineLoopInfo>());
}
- std::unique_ptr<ReleaseModeModelRunner<RegallocEvictModel>> Runner;
+ std::unique_ptr<ReleaseModeModelRunner<CompiledModelType>> Runner;
};
-#endif
// ===================================
// Development mode-specifics
@@ -353,13 +366,6 @@ private:
//
// Features we log
#ifdef LLVM_HAVE_TF_API
-#define _DECL_FEATURES(type, name, shape, _) \
- TensorSpec::createSpec<type>(#name, shape),
-
-static const std::vector<TensorSpec> InputFeatures{
- {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)},
-};
-#undef _DECL_FEATURES
static const TensorSpec Output =
TensorSpec::createSpec<int64_t>(DecisionName, {1});
static const TensorSpec Reward = TensorSpec::createSpec<float>("reward", {1});
@@ -380,7 +386,7 @@ static const std::vector<TensorSpec> TrainingInputFeatures{
class DevelopmentModeEvictAdvisor : public MLEvictAdvisor {
public:
- DevelopmentModeEvictAdvisor(MachineFunction &MF, const RAGreedy &RA,
+ DevelopmentModeEvictAdvisor(const MachineFunction &MF, const RAGreedy &RA,
MLModelRunner *Runner,
const MachineBlockFrequencyInfo &MBFI,
const MachineLoopInfo &Loops, Logger *Log)
@@ -388,8 +394,8 @@ public:
private:
int64_t tryFindEvictionCandidatePosition(
- LiveInterval &VirtReg, const AllocationOrder &Order, unsigned OrderLimit,
- uint8_t CostPerUseLimit,
+ const LiveInterval &VirtReg, const AllocationOrder &Order,
+ unsigned OrderLimit, uint8_t CostPerUseLimit,
const SmallVirtRegSet &FixedRegisters) const override;
Logger *const Log;
@@ -436,7 +442,7 @@ private:
}
std::unique_ptr<RegAllocEvictionAdvisor>
- getAdvisor(MachineFunction &MF, const RAGreedy &RA) override {
+ getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override {
LLVMContext &Ctx = MF.getFunction().getContext();
if (ModelUnderTraining.empty() && TrainingLog.empty()) {
Ctx.emitError("Regalloc development mode should be requested with at "
@@ -496,7 +502,7 @@ float MLEvictAdvisor::getInitialQueueSize(const MachineFunction &MF) {
return Ret;
}
-MLEvictAdvisor::MLEvictAdvisor(MachineFunction &MF, const RAGreedy &RA,
+MLEvictAdvisor::MLEvictAdvisor(const MachineFunction &MF, const RAGreedy &RA,
MLModelRunner *Runner,
const MachineBlockFrequencyInfo &MBFI,
const MachineLoopInfo &Loops)
@@ -514,7 +520,7 @@ MLEvictAdvisor::MLEvictAdvisor(MachineFunction &MF, const RAGreedy &RA,
}
int64_t MLEvictAdvisor::tryFindEvictionCandidatePosition(
- LiveInterval &, const AllocationOrder &, unsigned, uint8_t,
+ const LiveInterval &, const AllocationOrder &, unsigned, uint8_t,
const SmallVirtRegSet &) const {
int64_t Ret = Runner->evaluate<int64_t>();
assert(Ret >= 0);
@@ -523,7 +529,7 @@ int64_t MLEvictAdvisor::tryFindEvictionCandidatePosition(
}
bool MLEvictAdvisor::loadInterferenceFeatures(
- LiveInterval &VirtReg, MCRegister PhysReg, bool IsHint,
+ const LiveInterval &VirtReg, MCRegister PhysReg, bool IsHint,
const SmallVirtRegSet &FixedRegisters, FeaturesListNormalizer &Largest,
size_t Pos) const {
// It is only possible to evict virtual register interference.
@@ -539,16 +545,18 @@ bool MLEvictAdvisor::loadInterferenceFeatures(
// The cascade tracking is the same as in the default advisor
unsigned Cascade = RA.getExtraInfo().getCascadeOrCurrentNext(VirtReg.reg());
- SmallVector<LiveInterval *, MaxInterferences> InterferingIntervals;
+ SmallVector<const LiveInterval *, MaxInterferences> InterferingIntervals;
for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
// Different from the default heuristic, we don't make any assumptions about
// what having more than 10 results in the query may mean.
- const auto &IFIntervals = Q.interferingVRegs();
+ const auto &IFIntervals = Q.interferingVRegs(EvictInterferenceCutoff);
if (IFIntervals.empty() && InterferingIntervals.empty())
continue;
+ if (IFIntervals.size() >= EvictInterferenceCutoff)
+ return false;
InterferingIntervals.append(IFIntervals.begin(), IFIntervals.end());
- for (LiveInterval *Intf : reverse(IFIntervals)) {
+ for (const LiveInterval *Intf : reverse(IFIntervals)) {
assert(Register::isVirtualRegister(Intf->reg()) &&
"Only expecting virtual register interference from query");
// This is the same set of legality checks as in the default case: don't
@@ -587,7 +595,7 @@ bool MLEvictAdvisor::loadInterferenceFeatures(
}
MCRegister MLEvictAdvisor::tryFindEvictionCandidate(
- LiveInterval &VirtReg, const AllocationOrder &Order,
+ const LiveInterval &VirtReg, const AllocationOrder &Order,
uint8_t CostPerUseLimit, const SmallVirtRegSet &FixedRegisters) const {
auto MaybeOrderLimit = getOrderLimit(VirtReg, Order, CostPerUseLimit);
if (!MaybeOrderLimit)
@@ -652,7 +660,7 @@ MCRegister MLEvictAdvisor::tryFindEvictionCandidate(
// decision making process.
Regs[CandidateVirtRegPos].second = !MustFindEviction;
if (!MustFindEviction)
- extractFeatures(SmallVector<LiveInterval *, 1>(1, &VirtReg), Largest,
+ extractFeatures(SmallVector<const LiveInterval *, 1>(1, &VirtReg), Largest,
CandidateVirtRegPos, /*IsHint*/ 0, /*LocalIntfsCount*/ 0,
/*NrUrgent*/ 0.0);
assert(InitialQSize > 0.0 && "We couldn't have gotten here if we had "
@@ -686,9 +694,15 @@ MCRegister MLEvictAdvisor::tryFindEvictionCandidate(
return Regs[CandidatePos].first;
}
-const LIFeatureComponents
+const LIFeatureComponents &
MLEvictAdvisor::getLIFeatureComponents(const LiveInterval &LI) const {
- LIFeatureComponents Ret;
+ RegID ID = LI.reg().id();
+ LIFeatureComponents Empty;
+ auto I = CachedFeatures.insert(std::make_pair(ID, Empty));
+ LIFeatureComponents &Ret = I.first->getSecond();
+ if (!I.second)
+ return Ret;
+
SmallPtrSet<MachineInstr *, 8> Visited;
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
@@ -733,7 +747,7 @@ MLEvictAdvisor::getLIFeatureComponents(const LiveInterval &LI) const {
// Overall, this currently mimics what we do for weight calculation, but instead
// of accummulating the various features, we keep them separate.
void MLEvictAdvisor::extractFeatures(
- const SmallVectorImpl<LiveInterval *> &Intervals,
+ const SmallVectorImpl<const LiveInterval *> &Intervals,
std::array<float, FeatureIDs::FeatureCount> &Largest, size_t Pos,
int64_t IsHint, int64_t LocalIntfsCount, float NrUrgent) const {
int64_t NrDefsAndUses = 0;
@@ -769,7 +783,7 @@ void MLEvictAdvisor::extractFeatures(
if (LI.endIndex() > EndSI)
EndSI = LI.endIndex();
- const LIFeatureComponents LIFC = getLIFeatureComponents(LI);
+ const LIFeatureComponents &LIFC = getLIFeatureComponents(LI);
NrBrokenHints += VRM->hasPreferredPhys(LI.reg());
NrDefsAndUses += LIFC.NrDefsAndUses;
@@ -831,8 +845,9 @@ RegAllocEvictionAdvisorAnalysis *llvm::createDevelopmentModeAdvisor() {
}
int64_t DevelopmentModeEvictAdvisor::tryFindEvictionCandidatePosition(
- LiveInterval &VirtReg, const AllocationOrder &Order, unsigned OrderLimit,
- uint8_t CostPerUseLimit, const SmallVirtRegSet &FixedRegisters) const {
+ const LiveInterval &VirtReg, const AllocationOrder &Order,
+ unsigned OrderLimit, uint8_t CostPerUseLimit,
+ const SmallVirtRegSet &FixedRegisters) const {
int64_t Ret = 0;
if (isa<ModelUnderTrainingRunner>(getRunner())) {
Ret = MLEvictAdvisor::tryFindEvictionCandidatePosition(
@@ -885,11 +900,9 @@ bool RegAllocScoring::runOnMachineFunction(MachineFunction &MF) {
}
#endif // #ifdef LLVM_HAVE_TF_API
-#if defined(LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL)
RegAllocEvictionAdvisorAnalysis *llvm::createReleaseModeAdvisor() {
return new ReleaseModeEvictionAdvisorAnalysis();
}
-#endif
// In all cases except development mode, we don't need scoring.
#if !defined(LLVM_HAVE_TF_API)
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp
index 8c9d00d08c6a..c186d0ba9969 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -11,8 +11,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -26,12 +26,10 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/ModuleSlotTracker.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/Support/DataTypes.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
@@ -53,8 +51,7 @@ MachineBasicBlock::MachineBasicBlock(MachineFunction &MF, const BasicBlock *B)
IrrLoopHeaderWeight = B->getIrrLoopHeaderWeight();
}
-MachineBasicBlock::~MachineBasicBlock() {
-}
+MachineBasicBlock::~MachineBasicBlock() = default;
/// Return the MCSymbol for this basic block.
MCSymbol *MachineBasicBlock::getSymbol() const {
@@ -135,7 +132,7 @@ void ilist_callback_traits<MachineBasicBlock>::addNodeToList(
// Make sure the instructions have their operands in the reginfo lists.
MachineRegisterInfo &RegInfo = MF.getRegInfo();
for (MachineInstr &MI : N->instrs())
- MI.AddRegOperandsToUseLists(RegInfo);
+ MI.addRegOperandsToUseLists(RegInfo);
}
void ilist_callback_traits<MachineBasicBlock>::removeNodeFromList(
@@ -153,7 +150,7 @@ void ilist_traits<MachineInstr>::addNodeToList(MachineInstr *N) {
// Add the instruction's register operands to their corresponding
// use/def lists.
MachineFunction *MF = Parent->getParent();
- N->AddRegOperandsToUseLists(MF->getRegInfo());
+ N->addRegOperandsToUseLists(MF->getRegInfo());
MF->handleInsertion(*N);
}
@@ -165,7 +162,7 @@ void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) {
// Remove from the use/def lists.
if (MachineFunction *MF = N->getMF()) {
MF->handleRemoval(*N);
- N->RemoveRegOperandsFromUseLists(MF->getRegInfo());
+ N->removeRegOperandsFromUseLists(MF->getRegInfo());
}
N->setParent(nullptr);
@@ -918,6 +915,10 @@ bool MachineBasicBlock::isLayoutSuccessor(const MachineBasicBlock *MBB) const {
return std::next(I) == MachineFunction::const_iterator(MBB);
}
+const MachineBasicBlock *MachineBasicBlock::getSingleSuccessor() const {
+ return Successors.size() == 1 ? Successors[0] : nullptr;
+}
+
MachineBasicBlock *MachineBasicBlock::getFallThrough() {
MachineFunction::iterator Fallthrough = getIterator();
++Fallthrough;
@@ -1620,6 +1621,16 @@ MachineBasicBlock::liveout_iterator MachineBasicBlock::liveout_begin() const {
return liveout_iterator(*this, ExceptionPointer, ExceptionSelector, false);
}
+bool MachineBasicBlock::sizeWithoutDebugLargerThan(unsigned Limit) const {
+ unsigned Cntr = 0;
+ auto R = instructionsWithoutDebug(begin(), end());
+ for (auto I = R.begin(), E = R.end(); I != E; ++I) {
+ if (++Cntr > Limit)
+ return true;
+ }
+ return false;
+}
+
const MBBSectionID MBBSectionID::ColdSectionID(MBBSectionID::SectionType::Cold);
const MBBSectionID
MBBSectionID::ExceptionSectionID(MBBSectionID::SectionType::Exception);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index c93ffaabf74c..4cc84f22bdde 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -34,13 +34,13 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/CodeGen/MBFIWrapper.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/CodeGen/MachineSizeOpts.h"
#include "llvm/CodeGen/TailDuplicator.h"
@@ -50,6 +50,7 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/PrintPasses.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Allocator.h"
@@ -200,10 +201,8 @@ static cl::opt<unsigned> TriangleChainCount(
cl::init(2),
cl::Hidden);
-static cl::opt<bool> EnableExtTspBlockPlacement(
- "enable-ext-tsp-block-placement", cl::Hidden, cl::init(false),
- cl::desc("Enable machine block placement based on the ext-tsp model, "
- "optimizing I-cache utilization."));
+extern cl::opt<bool> EnableExtTspBlockPlacement;
+extern cl::opt<bool> ApplyExtTspWithoutProfile;
namespace llvm {
extern cl::opt<unsigned> StaticLikelyProb;
@@ -3422,7 +3421,8 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
}
// Apply a post-processing optimizing block placement.
- if (MF.size() >= 3 && EnableExtTspBlockPlacement) {
+ if (MF.size() >= 3 && EnableExtTspBlockPlacement &&
+ (ApplyExtTspWithoutProfile || MF.getFunction().hasProfileData())) {
// Find a new placement and modify the layout of the blocks in the function.
applyExtTsp();
@@ -3660,6 +3660,9 @@ bool MachineBlockPlacementStats::runOnMachineFunction(MachineFunction &F) {
if (std::next(F.begin()) == F.end())
return false;
+ if (!isFunctionInPrintList(F.getName()))
+ return false;
+
MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
index c9f762f9a6e7..a84377d70855 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
@@ -12,10 +12,8 @@
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/IR/Instructions.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp
index 0fcb07252d0e..e60fd9f7883a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp
@@ -34,7 +34,6 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/InitializePasses.h"
-#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCRegister.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
@@ -91,6 +90,11 @@ namespace {
AU.addPreserved<MachineBlockFrequencyInfo>();
}
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties()
+ .set(MachineFunctionProperties::Property::IsSSA);
+ }
+
void releaseMemory() override {
ScopeMap.clear();
PREMap.clear();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCheckDebugify.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCheckDebugify.cpp
index bd7f0f862947..1e5b8dd0bbb0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineCheckDebugify.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCheckDebugify.cpp
@@ -11,13 +11,14 @@
/// DILocalVariable which mir-debugifiy generated before.
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/InitializePasses.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Transforms/Utils/Debugify.h"
+#include "llvm/Pass.h"
#define DEBUG_TYPE "mir-check-debugify"
@@ -27,9 +28,6 @@ namespace {
struct CheckDebugMachineModule : public ModulePass {
bool runOnModule(Module &M) override {
- MachineModuleInfo &MMI =
- getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
-
NamedMDNode *NMD = M.getNamedMetadata("llvm.mir.debugify");
if (!NMD) {
errs() << "WARNING: Please run mir-debugify to generate "
@@ -37,6 +35,9 @@ struct CheckDebugMachineModule : public ModulePass {
return false;
}
+ MachineModuleInfo &MMI =
+ getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
+
auto getDebugifyOperand = [&](unsigned Idx) -> unsigned {
return mdconst::extract<ConstantInt>(NMD->getOperand(Idx)->getOperand(0))
->getZExtValue();
@@ -106,8 +107,7 @@ struct CheckDebugMachineModule : public ModulePass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineModuleInfoWrapperPass>();
- AU.addPreserved<MachineModuleInfoWrapperPass>();
- AU.setPreservesCFG();
+ AU.setPreservesAll();
}
static char ID; // Pass identification.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp
index 72ab9ee4f388..722a709af240 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp
@@ -21,7 +21,6 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineSizeOpts.h"
#include "llvm/CodeGen/MachineTraceMetrics.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -278,6 +277,8 @@ static CombinerObjective getCombinerObjective(MachineCombinerPattern P) {
case MachineCombinerPattern::REASSOC_XA_YB:
case MachineCombinerPattern::REASSOC_XY_AMM_BMM:
case MachineCombinerPattern::REASSOC_XMM_AMM_BMM:
+ case MachineCombinerPattern::SUBADD_OP1:
+ case MachineCombinerPattern::SUBADD_OP2:
return CombinerObjective::MustReduceDepth;
case MachineCombinerPattern::REASSOC_XY_BCA:
case MachineCombinerPattern::REASSOC_XY_BAC:
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp
index 57fbe4112e47..66f0eb83e57c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp
@@ -83,8 +83,24 @@ STATISTIC(NumCopyBackwardPropagated, "Number of copy defs backward propagated");
DEBUG_COUNTER(FwdCounter, "machine-cp-fwd",
"Controls which register COPYs are forwarded");
+static cl::opt<bool> MCPUseCopyInstr("mcp-use-is-copy-instr", cl::init(false),
+ cl::Hidden);
+
namespace {
+static Optional<DestSourcePair> isCopyInstr(const MachineInstr &MI,
+ const TargetInstrInfo &TII,
+ bool UseCopyInstr) {
+ if (UseCopyInstr)
+ return TII.isCopyInstr(MI);
+
+ if (MI.isCopy())
+ return Optional<DestSourcePair>(
+ DestSourcePair{MI.getOperand(0), MI.getOperand(1)});
+
+ return None;
+}
+
class CopyTracker {
struct CopyInfo {
MachineInstr *MI;
@@ -110,7 +126,8 @@ public:
}
/// Remove register from copy maps.
- void invalidateRegister(MCRegister Reg, const TargetRegisterInfo &TRI) {
+ void invalidateRegister(MCRegister Reg, const TargetRegisterInfo &TRI,
+ const TargetInstrInfo &TII, bool UseCopyInstr) {
// Since Reg might be a subreg of some registers, only invalidate Reg is not
// enough. We have to find the COPY defines Reg or registers defined by Reg
// and invalidate all of them.
@@ -120,8 +137,13 @@ public:
auto I = Copies.find(*RUI);
if (I != Copies.end()) {
if (MachineInstr *MI = I->second.MI) {
- RegsToInvalidate.insert(MI->getOperand(0).getReg().asMCReg());
- RegsToInvalidate.insert(MI->getOperand(1).getReg().asMCReg());
+ Optional<DestSourcePair> CopyOperands =
+ isCopyInstr(*MI, TII, UseCopyInstr);
+ assert(CopyOperands && "Expect copy");
+
+ RegsToInvalidate.insert(
+ CopyOperands->Destination->getReg().asMCReg());
+ RegsToInvalidate.insert(CopyOperands->Source->getReg().asMCReg());
}
RegsToInvalidate.insert(I->second.DefRegs.begin(),
I->second.DefRegs.end());
@@ -133,7 +155,8 @@ public:
}
/// Clobber a single register, removing it from the tracker's copy maps.
- void clobberRegister(MCRegister Reg, const TargetRegisterInfo &TRI) {
+ void clobberRegister(MCRegister Reg, const TargetRegisterInfo &TRI,
+ const TargetInstrInfo &TII, bool UseCopyInstr) {
for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI) {
auto I = Copies.find(*RUI);
if (I != Copies.end()) {
@@ -142,8 +165,12 @@ public:
markRegsUnavailable(I->second.DefRegs, TRI);
// When we clobber the destination of a copy, we need to clobber the
// whole register it defined.
- if (MachineInstr *MI = I->second.MI)
- markRegsUnavailable({MI->getOperand(0).getReg().asMCReg()}, TRI);
+ if (MachineInstr *MI = I->second.MI) {
+ Optional<DestSourcePair> CopyOperands =
+ isCopyInstr(*MI, TII, UseCopyInstr);
+ markRegsUnavailable({CopyOperands->Destination->getReg().asMCReg()},
+ TRI);
+ }
// Now we can erase the copy.
Copies.erase(I);
}
@@ -151,11 +178,13 @@ public:
}
/// Add this copy's registers into the tracker's copy maps.
- void trackCopy(MachineInstr *MI, const TargetRegisterInfo &TRI) {
- assert(MI->isCopy() && "Tracking non-copy?");
+ void trackCopy(MachineInstr *MI, const TargetRegisterInfo &TRI,
+ const TargetInstrInfo &TII, bool UseCopyInstr) {
+ Optional<DestSourcePair> CopyOperands = isCopyInstr(*MI, TII, UseCopyInstr);
+ assert(CopyOperands && "Tracking non-copy?");
- MCRegister Def = MI->getOperand(0).getReg().asMCReg();
- MCRegister Src = MI->getOperand(1).getReg().asMCReg();
+ MCRegister Src = CopyOperands->Source->getReg().asMCReg();
+ MCRegister Def = CopyOperands->Destination->getReg().asMCReg();
// Remember Def is defined by the copy.
for (MCRegUnitIterator RUI(Def, &TRI); RUI.isValid(); ++RUI)
@@ -198,15 +227,22 @@ public:
}
MachineInstr *findAvailBackwardCopy(MachineInstr &I, MCRegister Reg,
- const TargetRegisterInfo &TRI) {
+ const TargetRegisterInfo &TRI,
+ const TargetInstrInfo &TII,
+ bool UseCopyInstr) {
MCRegUnitIterator RUI(Reg, &TRI);
MachineInstr *AvailCopy = findCopyDefViaUnit(*RUI, TRI);
- if (!AvailCopy ||
- !TRI.isSubRegisterEq(AvailCopy->getOperand(1).getReg(), Reg))
+
+ if (!AvailCopy)
+ return nullptr;
+
+ Optional<DestSourcePair> CopyOperands =
+ isCopyInstr(*AvailCopy, TII, UseCopyInstr);
+ Register AvailSrc = CopyOperands->Source->getReg();
+ Register AvailDef = CopyOperands->Destination->getReg();
+ if (!TRI.isSubRegisterEq(AvailSrc, Reg))
return nullptr;
- Register AvailSrc = AvailCopy->getOperand(1).getReg();
- Register AvailDef = AvailCopy->getOperand(0).getReg();
for (const MachineInstr &MI :
make_range(AvailCopy->getReverseIterator(), I.getReverseIterator()))
for (const MachineOperand &MO : MI.operands())
@@ -219,20 +255,26 @@ public:
}
MachineInstr *findAvailCopy(MachineInstr &DestCopy, MCRegister Reg,
- const TargetRegisterInfo &TRI) {
+ const TargetRegisterInfo &TRI,
+ const TargetInstrInfo &TII, bool UseCopyInstr) {
// We check the first RegUnit here, since we'll only be interested in the
// copy if it copies the entire register anyway.
MCRegUnitIterator RUI(Reg, &TRI);
MachineInstr *AvailCopy =
findCopyForUnit(*RUI, TRI, /*MustBeAvailable=*/true);
- if (!AvailCopy ||
- !TRI.isSubRegisterEq(AvailCopy->getOperand(0).getReg(), Reg))
+
+ if (!AvailCopy)
+ return nullptr;
+
+ Optional<DestSourcePair> CopyOperands =
+ isCopyInstr(*AvailCopy, TII, UseCopyInstr);
+ Register AvailSrc = CopyOperands->Source->getReg();
+ Register AvailDef = CopyOperands->Destination->getReg();
+ if (!TRI.isSubRegisterEq(AvailDef, Reg))
return nullptr;
// Check that the available copy isn't clobbered by any regmasks between
// itself and the destination.
- Register AvailSrc = AvailCopy->getOperand(1).getReg();
- Register AvailDef = AvailCopy->getOperand(0).getReg();
for (const MachineInstr &MI :
make_range(AvailCopy->getIterator(), DestCopy.getIterator()))
for (const MachineOperand &MO : MI.operands())
@@ -253,10 +295,14 @@ class MachineCopyPropagation : public MachineFunctionPass {
const TargetInstrInfo *TII;
const MachineRegisterInfo *MRI;
+ // Return true if this is a copy instruction and false otherwise.
+ bool UseCopyInstr;
+
public:
static char ID; // Pass identification, replacement for typeid
- MachineCopyPropagation() : MachineFunctionPass(ID) {
+ MachineCopyPropagation(bool CopyInstr = false)
+ : MachineFunctionPass(ID), UseCopyInstr(CopyInstr || MCPUseCopyInstr) {
initializeMachineCopyPropagationPass(*PassRegistry::getPassRegistry());
}
@@ -334,9 +380,13 @@ void MachineCopyPropagation::ReadRegister(MCRegister Reg, MachineInstr &Reader,
/// isNopCopy("ecx = COPY eax", AX, CX) == true
/// isNopCopy("ecx = COPY eax", AH, CL) == false
static bool isNopCopy(const MachineInstr &PreviousCopy, MCRegister Src,
- MCRegister Def, const TargetRegisterInfo *TRI) {
- MCRegister PreviousSrc = PreviousCopy.getOperand(1).getReg().asMCReg();
- MCRegister PreviousDef = PreviousCopy.getOperand(0).getReg().asMCReg();
+ MCRegister Def, const TargetRegisterInfo *TRI,
+ const TargetInstrInfo *TII, bool UseCopyInstr) {
+
+ Optional<DestSourcePair> CopyOperands =
+ isCopyInstr(PreviousCopy, *TII, UseCopyInstr);
+ MCRegister PreviousSrc = CopyOperands->Source->getReg().asMCReg();
+ MCRegister PreviousDef = CopyOperands->Destination->getReg().asMCReg();
if (Src == PreviousSrc && Def == PreviousDef)
return true;
if (!TRI->isSubRegister(PreviousSrc, Src))
@@ -356,22 +406,26 @@ bool MachineCopyPropagation::eraseIfRedundant(MachineInstr &Copy,
return false;
// Search for an existing copy.
- MachineInstr *PrevCopy = Tracker.findAvailCopy(Copy, Def, *TRI);
+ MachineInstr *PrevCopy =
+ Tracker.findAvailCopy(Copy, Def, *TRI, *TII, UseCopyInstr);
if (!PrevCopy)
return false;
+ auto PrevCopyOperands = isCopyInstr(*PrevCopy, *TII, UseCopyInstr);
// Check that the existing copy uses the correct sub registers.
- if (PrevCopy->getOperand(0).isDead())
+ if (PrevCopyOperands->Destination->isDead())
return false;
- if (!isNopCopy(*PrevCopy, Src, Def, TRI))
+ if (!isNopCopy(*PrevCopy, Src, Def, TRI, TII, UseCopyInstr))
return false;
LLVM_DEBUG(dbgs() << "MCP: copy is a NOP, removing: "; Copy.dump());
// Copy was redundantly redefining either Src or Def. Remove earlier kill
// flags between Copy and PrevCopy because the value will be reused now.
- assert(Copy.isCopy());
- Register CopyDef = Copy.getOperand(0).getReg();
+ Optional<DestSourcePair> CopyOperands = isCopyInstr(Copy, *TII, UseCopyInstr);
+ assert(CopyOperands);
+
+ Register CopyDef = CopyOperands->Destination->getReg();
assert(CopyDef == Src || CopyDef == Def);
for (MachineInstr &MI :
make_range(PrevCopy->getIterator(), Copy.getIterator()))
@@ -385,7 +439,9 @@ bool MachineCopyPropagation::eraseIfRedundant(MachineInstr &Copy,
bool MachineCopyPropagation::isBackwardPropagatableRegClassCopy(
const MachineInstr &Copy, const MachineInstr &UseI, unsigned UseIdx) {
- Register Def = Copy.getOperand(0).getReg();
+
+ Optional<DestSourcePair> CopyOperands = isCopyInstr(Copy, *TII, UseCopyInstr);
+ Register Def = CopyOperands->Destination->getReg();
if (const TargetRegisterClass *URC =
UseI.getRegClassConstraint(UseIdx, TII, TRI))
@@ -403,7 +459,8 @@ bool MachineCopyPropagation::isForwardableRegClassCopy(const MachineInstr &Copy,
const MachineInstr &UseI,
unsigned UseIdx) {
- Register CopySrcReg = Copy.getOperand(1).getReg();
+ Optional<DestSourcePair> CopyOperands = isCopyInstr(Copy, *TII, UseCopyInstr);
+ Register CopySrcReg = CopyOperands->Source->getReg();
// If the new register meets the opcode register constraints, then allow
// forwarding.
@@ -411,34 +468,10 @@ bool MachineCopyPropagation::isForwardableRegClassCopy(const MachineInstr &Copy,
UseI.getRegClassConstraint(UseIdx, TII, TRI))
return URC->contains(CopySrcReg);
- if (!UseI.isCopy())
+ auto UseICopyOperands = isCopyInstr(UseI, *TII, UseCopyInstr);
+ if (!UseICopyOperands)
return false;
- const TargetRegisterClass *CopySrcRC =
- TRI->getMinimalPhysRegClass(CopySrcReg);
- const TargetRegisterClass *UseDstRC =
- TRI->getMinimalPhysRegClass(UseI.getOperand(0).getReg());
- const TargetRegisterClass *CrossCopyRC = TRI->getCrossCopyRegClass(CopySrcRC);
-
- // If cross copy register class is not the same as copy source register class
- // then it is not possible to copy the register directly and requires a cross
- // register class copy. Fowarding this copy without checking register class of
- // UseDst may create additional cross register copies when expanding the copy
- // instruction in later passes.
- if (CopySrcRC != CrossCopyRC) {
- const TargetRegisterClass *CopyDstRC =
- TRI->getMinimalPhysRegClass(Copy.getOperand(0).getReg());
-
- // Check if UseDstRC matches the necessary register class to copy from
- // CopySrc's register class. If so then forwarding the copy will not
- // introduce any cross-class copys. Else if CopyDstRC matches then keep the
- // copy and do not forward. If neither UseDstRC or CopyDstRC matches then
- // we may need a cross register copy later but we do not worry about it
- // here.
- if (UseDstRC != CrossCopyRC && CopyDstRC == CrossCopyRC)
- return false;
- }
-
/// COPYs don't have register class constraints, so if the user instruction
/// is a COPY, we just try to avoid introducing additional cross-class
/// COPYs. For example:
@@ -455,12 +488,34 @@ bool MachineCopyPropagation::isForwardableRegClassCopy(const MachineInstr &Copy,
///
/// so we have reduced the number of cross-class COPYs and potentially
/// introduced a nop COPY that can be removed.
- const TargetRegisterClass *SuperRC = UseDstRC;
- for (TargetRegisterClass::sc_iterator SuperRCI = UseDstRC->getSuperClasses();
- SuperRC; SuperRC = *SuperRCI++)
- if (SuperRC->contains(CopySrcReg))
- return true;
+ // Allow forwarding if src and dst belong to any common class, so long as they
+ // don't belong to any (possibly smaller) common class that requires copies to
+ // go via a different class.
+ Register UseDstReg = UseICopyOperands->Destination->getReg();
+ bool Found = false;
+ bool IsCrossClass = false;
+ for (const TargetRegisterClass *RC : TRI->regclasses()) {
+ if (RC->contains(CopySrcReg) && RC->contains(UseDstReg)) {
+ Found = true;
+ if (TRI->getCrossCopyRegClass(RC) != RC) {
+ IsCrossClass = true;
+ break;
+ }
+ }
+ }
+ if (!Found)
+ return false;
+ if (!IsCrossClass)
+ return true;
+ // The forwarded copy would be cross-class. Only do this if the original copy
+ // was also cross-class.
+ Register CopyDstReg = CopyOperands->Destination->getReg();
+ for (const TargetRegisterClass *RC : TRI->regclasses()) {
+ if (RC->contains(CopySrcReg) && RC->contains(CopyDstReg) &&
+ TRI->getCrossCopyRegClass(RC) != RC)
+ return true;
+ }
return false;
}
@@ -527,13 +582,15 @@ void MachineCopyPropagation::forwardUses(MachineInstr &MI) {
if (!MOUse.isRenamable())
continue;
- MachineInstr *Copy =
- Tracker.findAvailCopy(MI, MOUse.getReg().asMCReg(), *TRI);
+ MachineInstr *Copy = Tracker.findAvailCopy(MI, MOUse.getReg().asMCReg(),
+ *TRI, *TII, UseCopyInstr);
if (!Copy)
continue;
- Register CopyDstReg = Copy->getOperand(0).getReg();
- const MachineOperand &CopySrc = Copy->getOperand(1);
+ Optional<DestSourcePair> CopyOperands =
+ isCopyInstr(*Copy, *TII, UseCopyInstr);
+ Register CopyDstReg = CopyOperands->Destination->getReg();
+ const MachineOperand &CopySrc = *CopyOperands->Source;
Register CopySrcReg = CopySrc.getReg();
// FIXME: Don't handle partial uses of wider COPYs yet.
@@ -557,7 +614,8 @@ void MachineCopyPropagation::forwardUses(MachineInstr &MI) {
// Check that the instruction is not a copy that partially overwrites the
// original copy source that we are about to use. The tracker mechanism
// cannot cope with that.
- if (MI.isCopy() && MI.modifiesRegister(CopySrcReg, TRI) &&
+ if (isCopyInstr(MI, *TII, UseCopyInstr) &&
+ MI.modifiesRegister(CopySrcReg, TRI) &&
!MI.definesRegister(CopySrcReg)) {
LLVM_DEBUG(dbgs() << "MCP: Copy source overlap with dest in " << MI);
continue;
@@ -596,76 +654,82 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
// Analyze copies (which don't overlap themselves).
- if (MI.isCopy() && !TRI->regsOverlap(MI.getOperand(0).getReg(),
- MI.getOperand(1).getReg())) {
- assert(MI.getOperand(0).getReg().isPhysical() &&
- MI.getOperand(1).getReg().isPhysical() &&
- "MachineCopyPropagation should be run after register allocation!");
-
- MCRegister Def = MI.getOperand(0).getReg().asMCReg();
- MCRegister Src = MI.getOperand(1).getReg().asMCReg();
-
- // The two copies cancel out and the source of the first copy
- // hasn't been overridden, eliminate the second one. e.g.
- // %ecx = COPY %eax
- // ... nothing clobbered eax.
- // %eax = COPY %ecx
- // =>
- // %ecx = COPY %eax
- //
- // or
- //
- // %ecx = COPY %eax
- // ... nothing clobbered eax.
- // %ecx = COPY %eax
- // =>
- // %ecx = COPY %eax
- if (eraseIfRedundant(MI, Def, Src) || eraseIfRedundant(MI, Src, Def))
- continue;
+ Optional<DestSourcePair> CopyOperands = isCopyInstr(MI, *TII, UseCopyInstr);
+ if (CopyOperands) {
+
+ Register RegSrc = CopyOperands->Source->getReg();
+ Register RegDef = CopyOperands->Destination->getReg();
+
+ if (!TRI->regsOverlap(RegDef, RegSrc)) {
+ assert(RegDef.isPhysical() && RegSrc.isPhysical() &&
+ "MachineCopyPropagation should be run after register allocation!");
+
+ MCRegister Def = RegDef.asMCReg();
+ MCRegister Src = RegSrc.asMCReg();
+
+ // The two copies cancel out and the source of the first copy
+ // hasn't been overridden, eliminate the second one. e.g.
+ // %ecx = COPY %eax
+ // ... nothing clobbered eax.
+ // %eax = COPY %ecx
+ // =>
+ // %ecx = COPY %eax
+ //
+ // or
+ //
+ // %ecx = COPY %eax
+ // ... nothing clobbered eax.
+ // %ecx = COPY %eax
+ // =>
+ // %ecx = COPY %eax
+ if (eraseIfRedundant(MI, Def, Src) || eraseIfRedundant(MI, Src, Def))
+ continue;
- forwardUses(MI);
+ forwardUses(MI);
+
+ // Src may have been changed by forwardUses()
+ CopyOperands = isCopyInstr(MI, *TII, UseCopyInstr);
+ Src = CopyOperands->Source->getReg().asMCReg();
+
+ // If Src is defined by a previous copy, the previous copy cannot be
+ // eliminated.
+ ReadRegister(Src, MI, RegularUse);
+ for (const MachineOperand &MO : MI.implicit_operands()) {
+ if (!MO.isReg() || !MO.readsReg())
+ continue;
+ MCRegister Reg = MO.getReg().asMCReg();
+ if (!Reg)
+ continue;
+ ReadRegister(Reg, MI, RegularUse);
+ }
- // Src may have been changed by forwardUses()
- Src = MI.getOperand(1).getReg().asMCReg();
+ LLVM_DEBUG(dbgs() << "MCP: Copy is a deletion candidate: "; MI.dump());
+
+ // Copy is now a candidate for deletion.
+ if (!MRI->isReserved(Def))
+ MaybeDeadCopies.insert(&MI);
+
+ // If 'Def' is previously source of another copy, then this earlier copy's
+ // source is no longer available. e.g.
+ // %xmm9 = copy %xmm2
+ // ...
+ // %xmm2 = copy %xmm0
+ // ...
+ // %xmm2 = copy %xmm9
+ Tracker.clobberRegister(Def, *TRI, *TII, UseCopyInstr);
+ for (const MachineOperand &MO : MI.implicit_operands()) {
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ MCRegister Reg = MO.getReg().asMCReg();
+ if (!Reg)
+ continue;
+ Tracker.clobberRegister(Reg, *TRI, *TII, UseCopyInstr);
+ }
- // If Src is defined by a previous copy, the previous copy cannot be
- // eliminated.
- ReadRegister(Src, MI, RegularUse);
- for (const MachineOperand &MO : MI.implicit_operands()) {
- if (!MO.isReg() || !MO.readsReg())
- continue;
- MCRegister Reg = MO.getReg().asMCReg();
- if (!Reg)
- continue;
- ReadRegister(Reg, MI, RegularUse);
- }
+ Tracker.trackCopy(&MI, *TRI, *TII, UseCopyInstr);
- LLVM_DEBUG(dbgs() << "MCP: Copy is a deletion candidate: "; MI.dump());
-
- // Copy is now a candidate for deletion.
- if (!MRI->isReserved(Def))
- MaybeDeadCopies.insert(&MI);
-
- // If 'Def' is previously source of another copy, then this earlier copy's
- // source is no longer available. e.g.
- // %xmm9 = copy %xmm2
- // ...
- // %xmm2 = copy %xmm0
- // ...
- // %xmm2 = copy %xmm9
- Tracker.clobberRegister(Def, *TRI);
- for (const MachineOperand &MO : MI.implicit_operands()) {
- if (!MO.isReg() || !MO.isDef())
- continue;
- MCRegister Reg = MO.getReg().asMCReg();
- if (!Reg)
- continue;
- Tracker.clobberRegister(Reg, *TRI);
+ continue;
}
-
- Tracker.trackCopy(&MI, *TRI);
-
- continue;
}
// Clobber any earlyclobber regs first.
@@ -677,7 +741,7 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
// later.
if (MO.isTied())
ReadRegister(Reg, MI, RegularUse);
- Tracker.clobberRegister(Reg, *TRI);
+ Tracker.clobberRegister(Reg, *TRI, *TII, UseCopyInstr);
}
forwardUses(MI);
@@ -713,7 +777,9 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
MaybeDeadCopies.begin();
DI != MaybeDeadCopies.end();) {
MachineInstr *MaybeDead = *DI;
- MCRegister Reg = MaybeDead->getOperand(0).getReg().asMCReg();
+ Optional<DestSourcePair> CopyOperands =
+ isCopyInstr(*MaybeDead, *TII, UseCopyInstr);
+ MCRegister Reg = CopyOperands->Destination->getReg().asMCReg();
assert(!MRI->isReserved(Reg));
if (!RegMask->clobbersPhysReg(Reg)) {
@@ -726,7 +792,7 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
// Make sure we invalidate any entries in the copy maps before erasing
// the instruction.
- Tracker.clobberRegister(Reg, *TRI);
+ Tracker.clobberRegister(Reg, *TRI, *TII, UseCopyInstr);
// erase() will return the next valid iterator pointing to the next
// element after the erased one.
@@ -739,7 +805,7 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
// Any previous copy definition or reading the Defs is no longer available.
for (MCRegister Reg : Defs)
- Tracker.clobberRegister(Reg, *TRI);
+ Tracker.clobberRegister(Reg, *TRI, *TII, UseCopyInstr);
}
// If MBB doesn't have successors, delete the copies whose defs are not used.
@@ -749,12 +815,16 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
for (MachineInstr *MaybeDead : MaybeDeadCopies) {
LLVM_DEBUG(dbgs() << "MCP: Removing copy due to no live-out succ: ";
MaybeDead->dump());
- assert(!MRI->isReserved(MaybeDead->getOperand(0).getReg()));
+
+ Optional<DestSourcePair> CopyOperands =
+ isCopyInstr(*MaybeDead, *TII, UseCopyInstr);
+ assert(CopyOperands);
+
+ Register SrcReg = CopyOperands->Source->getReg();
+ Register DestReg = CopyOperands->Destination->getReg();
+ assert(!MRI->isReserved(DestReg));
// Update matching debug values, if any.
- assert(MaybeDead->isCopy());
- Register SrcReg = MaybeDead->getOperand(1).getReg();
- Register DestReg = MaybeDead->getOperand(0).getReg();
SmallVector<MachineInstr *> MaybeDeadDbgUsers(
CopyDbgUsers[MaybeDead].begin(), CopyDbgUsers[MaybeDead].end());
MRI->updateDbgUsersToReg(DestReg.asMCReg(), SrcReg.asMCReg(),
@@ -772,10 +842,14 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
}
static bool isBackwardPropagatableCopy(MachineInstr &MI,
- const MachineRegisterInfo &MRI) {
- assert(MI.isCopy() && "MI is expected to be a COPY");
- Register Def = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
+ const MachineRegisterInfo &MRI,
+ const TargetInstrInfo &TII,
+ bool UseCopyInstr) {
+ Optional<DestSourcePair> CopyOperands = isCopyInstr(MI, TII, UseCopyInstr);
+ assert(CopyOperands && "MI is expected to be a COPY");
+
+ Register Def = CopyOperands->Destination->getReg();
+ Register Src = CopyOperands->Source->getReg();
if (!Def || !Src)
return false;
@@ -783,7 +857,7 @@ static bool isBackwardPropagatableCopy(MachineInstr &MI,
if (MRI.isReserved(Def) || MRI.isReserved(Src))
return false;
- return MI.getOperand(1).isRenamable() && MI.getOperand(1).isKill();
+ return CopyOperands->Source->isRenamable() && CopyOperands->Source->isKill();
}
void MachineCopyPropagation::propagateDefs(MachineInstr &MI) {
@@ -808,13 +882,15 @@ void MachineCopyPropagation::propagateDefs(MachineInstr &MI) {
if (!MODef.isRenamable())
continue;
- MachineInstr *Copy =
- Tracker.findAvailBackwardCopy(MI, MODef.getReg().asMCReg(), *TRI);
+ MachineInstr *Copy = Tracker.findAvailBackwardCopy(
+ MI, MODef.getReg().asMCReg(), *TRI, *TII, UseCopyInstr);
if (!Copy)
continue;
- Register Def = Copy->getOperand(0).getReg();
- Register Src = Copy->getOperand(1).getReg();
+ Optional<DestSourcePair> CopyOperands =
+ isCopyInstr(*Copy, *TII, UseCopyInstr);
+ Register Def = CopyOperands->Destination->getReg();
+ Register Src = CopyOperands->Source->getReg();
if (MODef.getReg() != Src)
continue;
@@ -833,7 +909,7 @@ void MachineCopyPropagation::propagateDefs(MachineInstr &MI) {
<< MI << " from " << *Copy);
MODef.setReg(Def);
- MODef.setIsRenamable(Copy->getOperand(0).isRenamable());
+ MODef.setIsRenamable(CopyOperands->Destination->isRenamable());
LLVM_DEBUG(dbgs() << "MCP: After replacement: " << MI << "\n");
MaybeDeadCopies.insert(Copy);
@@ -849,20 +925,23 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock(
for (MachineInstr &MI : llvm::make_early_inc_range(llvm::reverse(MBB))) {
// Ignore non-trivial COPYs.
- if (MI.isCopy() && MI.getNumOperands() == 2 &&
- !TRI->regsOverlap(MI.getOperand(0).getReg(),
- MI.getOperand(1).getReg())) {
-
- MCRegister Def = MI.getOperand(0).getReg().asMCReg();
- MCRegister Src = MI.getOperand(1).getReg().asMCReg();
-
- // Unlike forward cp, we don't invoke propagateDefs here,
- // just let forward cp do COPY-to-COPY propagation.
- if (isBackwardPropagatableCopy(MI, *MRI)) {
- Tracker.invalidateRegister(Src, *TRI);
- Tracker.invalidateRegister(Def, *TRI);
- Tracker.trackCopy(&MI, *TRI);
- continue;
+ Optional<DestSourcePair> CopyOperands = isCopyInstr(MI, *TII, UseCopyInstr);
+ if (CopyOperands && MI.getNumOperands() == 2) {
+ Register DefReg = CopyOperands->Destination->getReg();
+ Register SrcReg = CopyOperands->Source->getReg();
+
+ if (!TRI->regsOverlap(DefReg, SrcReg)) {
+ MCRegister Def = DefReg.asMCReg();
+ MCRegister Src = SrcReg.asMCReg();
+
+ // Unlike forward cp, we don't invoke propagateDefs here,
+ // just let forward cp do COPY-to-COPY propagation.
+ if (isBackwardPropagatableCopy(MI, *MRI, *TII, UseCopyInstr)) {
+ Tracker.invalidateRegister(Src, *TRI, *TII, UseCopyInstr);
+ Tracker.invalidateRegister(Def, *TRI, *TII, UseCopyInstr);
+ Tracker.trackCopy(&MI, *TRI, *TII, UseCopyInstr);
+ continue;
+ }
}
}
@@ -872,7 +951,7 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock(
MCRegister Reg = MO.getReg().asMCReg();
if (!Reg)
continue;
- Tracker.invalidateRegister(Reg, *TRI);
+ Tracker.invalidateRegister(Reg, *TRI, *TII, UseCopyInstr);
}
propagateDefs(MI);
@@ -884,7 +963,8 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock(
continue;
if (MO.isDef())
- Tracker.invalidateRegister(MO.getReg().asMCReg(), *TRI);
+ Tracker.invalidateRegister(MO.getReg().asMCReg(), *TRI, *TII,
+ UseCopyInstr);
if (MO.readsReg()) {
if (MO.isDebug()) {
@@ -898,7 +978,8 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock(
}
}
} else {
- Tracker.invalidateRegister(MO.getReg().asMCReg(), *TRI);
+ Tracker.invalidateRegister(MO.getReg().asMCReg(), *TRI, *TII,
+ UseCopyInstr);
}
}
}
@@ -906,8 +987,10 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock(
for (auto *Copy : MaybeDeadCopies) {
- Register Src = Copy->getOperand(1).getReg();
- Register Def = Copy->getOperand(0).getReg();
+ Optional<DestSourcePair> CopyOperands =
+ isCopyInstr(*Copy, *TII, UseCopyInstr);
+ Register Src = CopyOperands->Source->getReg();
+ Register Def = CopyOperands->Destination->getReg();
SmallVector<MachineInstr *> MaybeDeadDbgUsers(CopyDbgUsers[Copy].begin(),
CopyDbgUsers[Copy].end());
@@ -938,3 +1021,8 @@ bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) {
return Changed;
}
+
+MachineFunctionPass *
+llvm::createMachineCopyPropagationPass(bool UseCopyInstr = false) {
+ return new MachineCopyPropagation(UseCopyInstr);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCycleAnalysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCycleAnalysis.cpp
index 42a5e2b7af01..6871ac35b300 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineCycleAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCycleAnalysis.cpp
@@ -8,50 +8,15 @@
#include "llvm/CodeGen/MachineCycleAnalysis.h"
#include "llvm/ADT/GenericCycleImpl.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineSSAContext.h"
-#include "llvm/InitializePasses.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
using namespace llvm;
template class llvm::GenericCycleInfo<llvm::MachineSSAContext>;
template class llvm::GenericCycle<llvm::MachineSSAContext>;
-namespace {
-
-/// Legacy analysis pass which computes a \ref MachineCycleInfo.
-class MachineCycleInfoWrapperPass : public MachineFunctionPass {
- MachineFunction *F = nullptr;
- MachineCycleInfo CI;
-
-public:
- static char ID;
-
- MachineCycleInfoWrapperPass();
-
- MachineCycleInfo &getCycleInfo() { return CI; }
- const MachineCycleInfo &getCycleInfo() const { return CI; }
-
- bool runOnMachineFunction(MachineFunction &F) override;
- void getAnalysisUsage(AnalysisUsage &AU) const override;
- void releaseMemory() override;
- void print(raw_ostream &OS, const Module *M = nullptr) const override;
-
- // TODO: verify analysis
-};
-
-class MachineCycleInfoPrinterPass : public MachineFunctionPass {
-public:
- static char ID;
-
- MachineCycleInfoPrinterPass();
-
- bool runOnMachineFunction(MachineFunction &F) override;
- void getAnalysisUsage(AnalysisUsage &AU) const override;
-};
-
-} // namespace
-
char MachineCycleInfoWrapperPass::ID = 0;
MachineCycleInfoWrapperPass::MachineCycleInfoWrapperPass()
@@ -87,6 +52,16 @@ void MachineCycleInfoWrapperPass::releaseMemory() {
F = nullptr;
}
+class MachineCycleInfoPrinterPass : public MachineFunctionPass {
+public:
+ static char ID;
+
+ MachineCycleInfoPrinterPass();
+
+ bool runOnMachineFunction(MachineFunction &F) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+};
+
char MachineCycleInfoPrinterPass::ID = 0;
MachineCycleInfoPrinterPass::MachineCycleInfoPrinterPass()
@@ -111,3 +86,62 @@ bool MachineCycleInfoPrinterPass::runOnMachineFunction(MachineFunction &F) {
CI.print(errs());
return false;
}
+
+bool llvm::isCycleInvariant(const MachineCycle *Cycle, MachineInstr &I) {
+ MachineFunction *MF = I.getParent()->getParent();
+ MachineRegisterInfo *MRI = &MF->getRegInfo();
+ const TargetSubtargetInfo &ST = MF->getSubtarget();
+ const TargetRegisterInfo *TRI = ST.getRegisterInfo();
+ const TargetInstrInfo *TII = ST.getInstrInfo();
+
+ // The instruction is cycle invariant if all of its operands are.
+ for (const MachineOperand &MO : I.operands()) {
+ if (!MO.isReg())
+ continue;
+
+ Register Reg = MO.getReg();
+ if (Reg == 0)
+ continue;
+
+ // An instruction that uses or defines a physical register can't e.g. be
+ // hoisted, so mark this as not invariant.
+ if (Register::isPhysicalRegister(Reg)) {
+ if (MO.isUse()) {
+ // If the physreg has no defs anywhere, it's just an ambient register
+ // and we can freely move its uses. Alternatively, if it's allocatable,
+ // it could get allocated to something with a def during allocation.
+ // However, if the physreg is known to always be caller saved/restored
+ // then this use is safe to hoist.
+ if (!MRI->isConstantPhysReg(Reg) &&
+ !(TRI->isCallerPreservedPhysReg(Reg.asMCReg(), *I.getMF())) &&
+ !TII->isIgnorableUse(MO))
+ return false;
+ // Otherwise it's safe to move.
+ continue;
+ } else if (!MO.isDead()) {
+ // A def that isn't dead can't be moved.
+ return false;
+ } else if (any_of(Cycle->getEntries(),
+ [&](const MachineBasicBlock *Block) {
+ return Block->isLiveIn(Reg);
+ })) {
+ // If the reg is live into any header of the cycle we can't hoist an
+ // instruction which would clobber it.
+ return false;
+ }
+ }
+
+ if (!MO.isUse())
+ continue;
+
+ assert(MRI->getVRegDef(Reg) && "Machine instr not mapped for this vreg?!");
+
+ // If the cycle contains the definition of an operand, then the instruction
+ // isn't cycle invariant.
+ if (Cycle->contains(MRI->getVRegDef(Reg)->getParent()))
+ return false;
+ }
+
+ // If we got this far, the instruction is cycle invariant!
+ return true;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineDebugify.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineDebugify.cpp
index 599a81847592..b726a032ca18 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineDebugify.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineDebugify.cpp
@@ -16,14 +16,11 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
-#include "llvm/IR/DIBuilder.h"
-#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/InitializePasses.h"
#include "llvm/Transforms/Utils/Debugify.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineDominanceFrontier.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineDominanceFrontier.cpp
index a39dc79baaa8..346cfedde390 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineDominanceFrontier.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineDominanceFrontier.cpp
@@ -7,10 +7,11 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MachineDominanceFrontier.h"
-#include "llvm/Analysis/DominanceFrontierImpl.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineDominators.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineDominators.cpp
index 28cff2a4f3f3..0632cde9c6f4 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineDominators.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineDominators.cpp
@@ -15,6 +15,8 @@
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
#include "llvm/Support/CommandLine.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp
index 02f58ca5eef0..f58996ea90c6 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp
@@ -44,7 +44,6 @@
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
@@ -61,7 +60,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/DOTGraphTraits.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/raw_ostream.h"
@@ -109,6 +107,27 @@ static const char *getPropertyName(MachineFunctionProperties::Property Prop) {
llvm_unreachable("Invalid machine function property");
}
+void setUnsafeStackSize(const Function &F, MachineFrameInfo &FrameInfo) {
+ if (!F.hasFnAttribute(Attribute::SafeStack))
+ return;
+
+ auto *Existing =
+ dyn_cast_or_null<MDTuple>(F.getMetadata(LLVMContext::MD_annotation));
+
+ if (!Existing || Existing->getNumOperands() != 2)
+ return;
+
+ auto *MetadataName = "unsafe-stack-size";
+ if (auto &N = Existing->getOperand(0)) {
+ if (cast<MDString>(N.get())->getString() == MetadataName) {
+ if (auto &Op = Existing->getOperand(1)) {
+ auto Val = mdconst::extract<ConstantInt>(Op)->getZExtValue();
+ FrameInfo.setUnsafeStackSize(Val);
+ }
+ }
+ }
+}
+
// Pin the vtable to this file.
void MachineFunction::Delegate::anchor() {}
@@ -133,11 +152,11 @@ void ilist_alloc_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) {
MBB->getParent()->deleteMachineBasicBlock(MBB);
}
-static inline unsigned getFnStackAlignment(const TargetSubtargetInfo *STI,
+static inline Align getFnStackAlignment(const TargetSubtargetInfo *STI,
const Function &F) {
if (auto MA = F.getFnStackAlign())
- return MA->value();
- return STI->getFrameLowering()->getStackAlign().value();
+ return *MA;
+ return STI->getFrameLowering()->getStackAlign();
}
MachineFunction::MachineFunction(Function &F, const LLVMTargetMachine &Target,
@@ -177,6 +196,8 @@ void MachineFunction::init() {
/*ForcedRealign=*/CanRealignSP &&
F.hasFnAttribute(Attribute::StackAlignment));
+ setUnsafeStackSize(F, *FrameInfo);
+
if (F.hasFnAttribute(Attribute::StackAlignment))
FrameInfo->ensureMaxAlignment(*F.getFnStackAlign());
@@ -208,9 +229,7 @@ void MachineFunction::init() {
"Can't create a MachineFunction using a Module with a "
"Target-incompatible DataLayout attached\n");
- PSVManager =
- std::make_unique<PseudoSourceValueManager>(*(getSubtarget().
- getInstrInfo()));
+ PSVManager = std::make_unique<PseudoSourceValueManager>(getTarget());
}
MachineFunction::~MachineFunction() {
@@ -837,25 +856,6 @@ void MachineFunction::addCleanup(MachineBasicBlock *LandingPad) {
LP.TypeIds.push_back(0);
}
-void MachineFunction::addSEHCatchHandler(MachineBasicBlock *LandingPad,
- const Function *Filter,
- const BlockAddress *RecoverBA) {
- LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
- SEHHandler Handler;
- Handler.FilterOrFinally = Filter;
- Handler.RecoverBA = RecoverBA;
- LP.SEHHandlers.push_back(Handler);
-}
-
-void MachineFunction::addSEHCleanupHandler(MachineBasicBlock *LandingPad,
- const Function *Cleanup) {
- LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
- SEHHandler Handler;
- Handler.FilterOrFinally = Cleanup;
- Handler.RecoverBA = nullptr;
- LP.SEHHandlers.push_back(Handler);
-}
-
void MachineFunction::setCallSiteLandingPad(MCSymbol *Sym,
ArrayRef<unsigned> Sites) {
LPadToCallSiteMap[Sym].append(Sites.begin(), Sites.end());
@@ -1012,7 +1012,32 @@ void MachineFunction::substituteDebugValuesForInst(const MachineInstr &Old,
}
}
-auto MachineFunction::salvageCopySSA(MachineInstr &MI)
+auto MachineFunction::salvageCopySSA(
+ MachineInstr &MI, DenseMap<Register, DebugInstrOperandPair> &DbgPHICache)
+ -> DebugInstrOperandPair {
+ const TargetInstrInfo &TII = *getSubtarget().getInstrInfo();
+
+ // Check whether this copy-like instruction has already been salvaged into
+ // an operand pair.
+ Register Dest;
+ if (auto CopyDstSrc = TII.isCopyInstr(MI)) {
+ Dest = CopyDstSrc->Destination->getReg();
+ } else {
+ assert(MI.isSubregToReg());
+ Dest = MI.getOperand(0).getReg();
+ }
+
+ auto CacheIt = DbgPHICache.find(Dest);
+ if (CacheIt != DbgPHICache.end())
+ return CacheIt->second;
+
+ // Calculate the instruction number to use, or install a DBG_PHI.
+ auto OperandPair = salvageCopySSAImpl(MI);
+ DbgPHICache.insert({Dest, OperandPair});
+ return OperandPair;
+}
+
+auto MachineFunction::salvageCopySSAImpl(MachineInstr &MI)
-> DebugInstrOperandPair {
MachineRegisterInfo &MRI = getRegInfo();
const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
@@ -1141,26 +1166,13 @@ auto MachineFunction::salvageCopySSA(MachineInstr &MI)
MachineBasicBlock &InsertBB = *CurInst->getParent();
// We reached the start of the block before finding a defining instruction.
- // It could be from a constant register, otherwise it must be an argument.
- if (TRI.isConstantPhysReg(State.first)) {
- // We can produce a DBG_PHI that identifies the constant physreg. Doesn't
- // matter where we put it, as it's constant valued.
- assert(CurInst->isCopy());
- } else if (State.first == TRI.getFrameRegister(*this)) {
- // LLVM IR is allowed to read the framepointer by calling a
- // llvm.frameaddress.* intrinsic. We can support this by emitting a
- // DBG_PHI $fp. This isn't ideal, because it extends the behaviours /
- // position that DBG_PHIs appear at, limiting what can be done later.
- // TODO: see if there's a better way of expressing these variable
- // locations.
- ;
- } else {
- // Assert that this is the entry block, or an EH pad. If it isn't, then
- // there is some code construct we don't recognise that deals with physregs
- // across blocks.
- assert(!State.first.isVirtual());
- assert(&*InsertBB.getParent()->begin() == &InsertBB || InsertBB.isEHPad());
- }
+ // There are numerous scenarios where this can happen:
+ // * Constant physical registers,
+ // * Several intrinsics that allow LLVM-IR to read arbitary registers,
+ // * Arguments in the entry block,
+ // * Exception handling landing pads.
+ // Validating all of them is too difficult, so just insert a DBG_PHI reading
+ // the variable value at this position, rather than checking it makes sense.
// Create DBG_PHI for specified physreg.
auto Builder = BuildMI(InsertBB, InsertBB.getFirstNonPHI(), DebugLoc(),
@@ -1181,6 +1193,7 @@ void MachineFunction::finalizeDebugInstrRefs() {
MI.getOperand(1).ChangeToRegister(0, false);
};
+ DenseMap<Register, DebugInstrOperandPair> ArgDbgPHIs;
for (auto &MBB : *this) {
for (auto &MI : MBB) {
if (!MI.isDebugRef() || !MI.getOperand(0).isReg())
@@ -1203,7 +1216,7 @@ void MachineFunction::finalizeDebugInstrRefs() {
// instruction that defines the source value, see salvageCopySSA docs
// for why this is important.
if (DefMI.isCopyLike() || TII->isCopyInstr(DefMI)) {
- auto Result = salvageCopySSA(DefMI);
+ auto Result = salvageCopySSA(DefMI, ArgDbgPHIs);
MI.getOperand(0).ChangeToImmediate(Result.first);
MI.getOperand(1).setImm(Result.second);
} else {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPass.cpp
index 16cde1f601f9..99494122d608 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPass.cpp
@@ -17,6 +17,7 @@
#include "llvm/Analysis/IVUsers.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/CodeGen/MachineFunction.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionSplitter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionSplitter.cpp
index 0e0eb8b8e00f..81c97ba6a086 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionSplitter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionSplitter.cpp
@@ -24,7 +24,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/BasicBlockSectionUtils.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -34,7 +33,6 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
@@ -82,7 +80,7 @@ static bool isColdBlock(const MachineBasicBlock &MBB,
const MachineBlockFrequencyInfo *MBFI,
ProfileSummaryInfo *PSI) {
Optional<uint64_t> Count = MBFI->getBlockProfileCount(&MBB);
- if (!Count.hasValue())
+ if (!Count)
return true;
if (PercentileCutoff > 0) {
@@ -108,9 +106,8 @@ bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) {
// We don't want to proceed further for cold functions
// or functions of unknown hotness. Lukewarm functions have no prefix.
Optional<StringRef> SectionPrefix = MF.getFunction().getSectionPrefix();
- if (SectionPrefix.hasValue() &&
- (SectionPrefix.getValue().equals("unlikely") ||
- SectionPrefix.getValue().equals("unknown"))) {
+ if (SectionPrefix && (SectionPrefix.getValue().equals("unlikely") ||
+ SectionPrefix.getValue().equals("unknown"))) {
return false;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp
index 85b266afceef..31f45e194a97 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp
@@ -11,19 +11,14 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallBitVector.h"
-#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryLocation.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -38,42 +33,30 @@
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
-#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
-#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InlineAsm.h"
-#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/ModuleSlotTracker.h"
#include "llvm/IR/Operator.h"
-#include "llvm/IR/Type.h"
-#include "llvm/IR/Value.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/Casting.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/LowLevelTypeImpl.h"
-#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetMachine.h"
#include <algorithm>
#include <cassert>
-#include <cstddef>
#include <cstdint>
#include <cstring>
-#include <iterator>
#include <utility>
using namespace llvm;
@@ -163,19 +146,13 @@ MachineRegisterInfo *MachineInstr::getRegInfo() {
return nullptr;
}
-/// RemoveRegOperandsFromUseLists - Unlink all of the register operands in
-/// this instruction from their respective use lists. This requires that the
-/// operands already be on their use lists.
-void MachineInstr::RemoveRegOperandsFromUseLists(MachineRegisterInfo &MRI) {
+void MachineInstr::removeRegOperandsFromUseLists(MachineRegisterInfo &MRI) {
for (MachineOperand &MO : operands())
if (MO.isReg())
MRI.removeRegOperandFromUseList(&MO);
}
-/// AddRegOperandsToUseLists - Add all of the register operands in
-/// this instruction from their respective use lists. This requires that the
-/// operands not be on their use lists yet.
-void MachineInstr::AddRegOperandsToUseLists(MachineRegisterInfo &MRI) {
+void MachineInstr::addRegOperandsToUseLists(MachineRegisterInfo &MRI) {
for (MachineOperand &MO : operands())
if (MO.isReg())
MRI.addRegOperandToUseList(&MO);
@@ -232,16 +209,12 @@ void MachineInstr::addOperand(MachineFunction &MF, const MachineOperand &Op) {
}
}
-#ifndef NDEBUG
- bool isDebugOp = Op.getType() == MachineOperand::MO_Metadata ||
- Op.getType() == MachineOperand::MO_MCSymbol;
// OpNo now points as the desired insertion point. Unless this is a variadic
// instruction, only implicit regs are allowed beyond MCID->getNumOperands().
// RegMask operands go between the explicit and implicit operands.
- assert((isImpReg || Op.isRegMask() || MCID->isVariadic() ||
- OpNo < MCID->getNumOperands() || isDebugOp) &&
+ assert((MCID->isVariadic() || OpNo < MCID->getNumOperands() ||
+ Op.isValidExcessOperand()) &&
"Trying to add an operand to a machine instr that is already done!");
-#endif
MachineRegisterInfo *MRI = getRegInfo();
@@ -300,10 +273,7 @@ void MachineInstr::addOperand(MachineFunction &MF, const MachineOperand &Op) {
}
}
-/// RemoveOperand - Erase an operand from an instruction, leaving it with one
-/// fewer operand than it started with.
-///
-void MachineInstr::RemoveOperand(unsigned OpNo) {
+void MachineInstr::removeOperand(unsigned OpNo) {
assert(OpNo < getNumOperands() && "Invalid operand number");
untieRegOperand(OpNo);
@@ -1401,11 +1371,10 @@ bool MachineInstr::isDereferenceableInvariantLoad(AAResults *AA) const {
continue;
// A load from a constant PseudoSourceValue is invariant.
- if (const PseudoSourceValue *PSV = MMO->getPseudoValue())
+ if (const PseudoSourceValue *PSV = MMO->getPseudoValue()) {
if (PSV->isConstant(&MFI))
continue;
-
- if (const Value *V = MMO->getValue()) {
+ } else if (const Value *V = MMO->getValue()) {
// If we have an AliasAnalysis, ask it whether the memory is constant.
if (AA &&
AA->pointsToConstantMemory(
@@ -1904,7 +1873,7 @@ bool MachineInstr::addRegisterKilled(Register IncomingReg,
unsigned OpIdx = DeadOps.back();
if (getOperand(OpIdx).isImplicit() &&
(!isInlineAsm() || findInlineAsmFlagIdx(OpIdx) < 0))
- RemoveOperand(OpIdx);
+ removeOperand(OpIdx);
else
getOperand(OpIdx).setIsKill(false);
DeadOps.pop_back();
@@ -1969,7 +1938,7 @@ bool MachineInstr::addRegisterDead(Register Reg,
unsigned OpIdx = DeadOps.back();
if (getOperand(OpIdx).isImplicit() &&
(!isInlineAsm() || findInlineAsmFlagIdx(OpIdx) < 0))
- RemoveOperand(OpIdx);
+ removeOperand(OpIdx);
else
getOperand(OpIdx).setIsDead(false);
DeadOps.pop_back();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp
index 759cff179790..2f1d7b976264 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp
@@ -16,7 +16,8 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/InitializePasses.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
#include <utility>
using namespace llvm;
@@ -109,7 +110,7 @@ bool FinalizeMachineBundles::runOnMachineFunction(MachineFunction &MF) {
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI,
MachineBasicBlock::instr_iterator LastMI) {
for (auto MII = FirstMI; MII != LastMI; ++MII)
- if (MII->getDebugLoc().get())
+ if (MII->getDebugLoc())
return MII->getDebugLoc();
return DebugLoc();
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp
index 500cf8e0b79b..00d75f8231c7 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp
@@ -240,7 +240,7 @@ namespace {
void ExitScopeIfDone(
MachineDomTreeNode *Node,
DenseMap<MachineDomTreeNode *, unsigned> &OpenChildren,
- DenseMap<MachineDomTreeNode *, MachineDomTreeNode *> &ParentMap);
+ const DenseMap<MachineDomTreeNode *, MachineDomTreeNode *> &ParentMap);
void HoistOutOfLoop(MachineDomTreeNode *HeaderN);
@@ -696,19 +696,16 @@ void MachineLICMBase::ExitScope(MachineBasicBlock *MBB) {
/// destroy ancestors which are now done.
void MachineLICMBase::ExitScopeIfDone(MachineDomTreeNode *Node,
DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren,
- DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) {
+ const DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) {
if (OpenChildren[Node])
return;
- // Pop scope.
- ExitScope(Node->getBlock());
-
- // Now traverse upwards to pop ancestors whose offsprings are all done.
- while (MachineDomTreeNode *Parent = ParentMap[Node]) {
- unsigned Left = --OpenChildren[Parent];
- if (Left != 0)
+ for(;;) {
+ ExitScope(Node->getBlock());
+ // Now traverse upwards to pop ancestors whose offsprings are all done.
+ MachineDomTreeNode *Parent = ParentMap.lookup(Node);
+ if (!Parent || --OpenChildren[Parent] != 0)
break;
- ExitScope(Parent->getBlock());
Node = Parent;
}
}
@@ -999,6 +996,9 @@ bool MachineLICMBase::IsLICMCandidate(MachineInstr &I) {
if (I.isConvergent())
return false;
+ if (!TII->shouldHoist(I, CurLoop))
+ return false;
+
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopInfo.cpp
index 9b96bc5e5e7f..5cbded4b9264 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopInfo.cpp
@@ -17,13 +17,12 @@
#include "llvm/Analysis/LoopInfoImpl.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/InitializePasses.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopUtils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopUtils.cpp
index fdcc8472f1c2..0e8335d4974d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopUtils.cpp
@@ -6,7 +6,6 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineLoopUtils.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -64,7 +63,11 @@ MachineBasicBlock *llvm::PeelSingleBlockLoop(LoopPeelDirection Direction,
if (Use.getParent()->getParent() != Loop)
Uses.push_back(&Use);
for (auto *Use : Uses) {
- MRI.constrainRegClass(R, MRI.getRegClass(Use->getReg()));
+ const TargetRegisterClass *ConstrainRegClass =
+ MRI.constrainRegClass(R, MRI.getRegClass(Use->getReg()));
+ assert(ConstrainRegClass &&
+ "Expected a valid constrained register class!");
+ (void)ConstrainRegClass;
Use->setReg(R);
}
}
@@ -90,25 +93,24 @@ MachineBasicBlock *llvm::PeelSingleBlockLoop(LoopPeelDirection Direction,
if (Remaps.count(R))
R = Remaps[R];
OrigPhi.getOperand(InitRegIdx).setReg(R);
- MI.RemoveOperand(LoopRegIdx + 1);
- MI.RemoveOperand(LoopRegIdx + 0);
+ MI.removeOperand(LoopRegIdx + 1);
+ MI.removeOperand(LoopRegIdx + 0);
} else {
// When peeling back, the initial value is the loop-carried value from
// the original loop.
Register LoopReg = OrigPhi.getOperand(LoopRegIdx).getReg();
MI.getOperand(LoopRegIdx).setReg(LoopReg);
- MI.RemoveOperand(InitRegIdx + 1);
- MI.RemoveOperand(InitRegIdx + 0);
+ MI.removeOperand(InitRegIdx + 1);
+ MI.removeOperand(InitRegIdx + 0);
}
}
DebugLoc DL;
if (Direction == LPD_Front) {
- Preheader->replaceSuccessor(Loop, NewBB);
+ Preheader->ReplaceUsesOfBlockWith(Loop, NewBB);
NewBB->addSuccessor(Loop);
Loop->replacePhiUsesWith(Preheader, NewBB);
- if (TII->removeBranch(*Preheader) > 0)
- TII->insertBranch(*Preheader, NewBB, nullptr, {}, DL);
+ Preheader->updateTerminator(Loop);
TII->removeBranch(*NewBB);
TII->insertBranch(*NewBB, Loop, nullptr, {}, DL);
} else {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfo.cpp
index 31d4fc7d02bf..23d55a5df9f5 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfo.cpp
@@ -7,27 +7,18 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/TinyPtrVector.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/DiagnosticInfo.h"
-#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
-#include "llvm/IR/Value.h"
-#include "llvm/IR/ValueHandle.h"
#include "llvm/InitializePasses.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCSymbolXCOFF.h"
#include "llvm/Pass.h"
-#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
@@ -40,174 +31,24 @@
using namespace llvm;
using namespace llvm::dwarf;
+static cl::opt<bool>
+ DisableDebugInfoPrinting("disable-debug-info-print", cl::Hidden,
+ cl::desc("Disable debug info printing"));
+
// Out of line virtual method.
MachineModuleInfoImpl::~MachineModuleInfoImpl() = default;
-namespace llvm {
-
-class MMIAddrLabelMapCallbackPtr final : CallbackVH {
- MMIAddrLabelMap *Map = nullptr;
-
-public:
- MMIAddrLabelMapCallbackPtr() = default;
- MMIAddrLabelMapCallbackPtr(Value *V) : CallbackVH(V) {}
-
- void setPtr(BasicBlock *BB) {
- ValueHandleBase::operator=(BB);
- }
-
- void setMap(MMIAddrLabelMap *map) { Map = map; }
-
- void deleted() override;
- void allUsesReplacedWith(Value *V2) override;
-};
-
-class MMIAddrLabelMap {
- MCContext &Context;
- struct AddrLabelSymEntry {
- /// The symbols for the label.
- TinyPtrVector<MCSymbol *> Symbols;
-
- Function *Fn; // The containing function of the BasicBlock.
- unsigned Index; // The index in BBCallbacks for the BasicBlock.
- };
-
- DenseMap<AssertingVH<BasicBlock>, AddrLabelSymEntry> AddrLabelSymbols;
-
- /// Callbacks for the BasicBlock's that we have entries for. We use this so
- /// we get notified if a block is deleted or RAUWd.
- std::vector<MMIAddrLabelMapCallbackPtr> BBCallbacks;
-
- /// This is a per-function list of symbols whose corresponding BasicBlock got
- /// deleted. These symbols need to be emitted at some point in the file, so
- /// AsmPrinter emits them after the function body.
- DenseMap<AssertingVH<Function>, std::vector<MCSymbol*>>
- DeletedAddrLabelsNeedingEmission;
-
-public:
- MMIAddrLabelMap(MCContext &context) : Context(context) {}
-
- ~MMIAddrLabelMap() {
- assert(DeletedAddrLabelsNeedingEmission.empty() &&
- "Some labels for deleted blocks never got emitted");
- }
-
- ArrayRef<MCSymbol *> getAddrLabelSymbolToEmit(BasicBlock *BB);
-
- void takeDeletedSymbolsForFunction(Function *F,
- std::vector<MCSymbol*> &Result);
-
- void UpdateForDeletedBlock(BasicBlock *BB);
- void UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New);
-};
-
-} // end namespace llvm
-
-ArrayRef<MCSymbol *> MMIAddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) {
- assert(BB->hasAddressTaken() &&
- "Shouldn't get label for block without address taken");
- AddrLabelSymEntry &Entry = AddrLabelSymbols[BB];
-
- // If we already had an entry for this block, just return it.
- if (!Entry.Symbols.empty()) {
- assert(BB->getParent() == Entry.Fn && "Parent changed");
- return Entry.Symbols;
- }
-
- // Otherwise, this is a new entry, create a new symbol for it and add an
- // entry to BBCallbacks so we can be notified if the BB is deleted or RAUWd.
- BBCallbacks.emplace_back(BB);
- BBCallbacks.back().setMap(this);
- Entry.Index = BBCallbacks.size() - 1;
- Entry.Fn = BB->getParent();
- MCSymbol *Sym = BB->hasAddressTaken() ? Context.createNamedTempSymbol()
- : Context.createTempSymbol();
- Entry.Symbols.push_back(Sym);
- return Entry.Symbols;
-}
-
-/// If we have any deleted symbols for F, return them.
-void MMIAddrLabelMap::
-takeDeletedSymbolsForFunction(Function *F, std::vector<MCSymbol*> &Result) {
- DenseMap<AssertingVH<Function>, std::vector<MCSymbol*>>::iterator I =
- DeletedAddrLabelsNeedingEmission.find(F);
-
- // If there are no entries for the function, just return.
- if (I == DeletedAddrLabelsNeedingEmission.end()) return;
-
- // Otherwise, take the list.
- std::swap(Result, I->second);
- DeletedAddrLabelsNeedingEmission.erase(I);
-}
-
-void MMIAddrLabelMap::UpdateForDeletedBlock(BasicBlock *BB) {
- // If the block got deleted, there is no need for the symbol. If the symbol
- // was already emitted, we can just forget about it, otherwise we need to
- // queue it up for later emission when the function is output.
- AddrLabelSymEntry Entry = std::move(AddrLabelSymbols[BB]);
- AddrLabelSymbols.erase(BB);
- assert(!Entry.Symbols.empty() && "Didn't have a symbol, why a callback?");
- BBCallbacks[Entry.Index] = nullptr; // Clear the callback.
-
- assert((BB->getParent() == nullptr || BB->getParent() == Entry.Fn) &&
- "Block/parent mismatch");
-
- for (MCSymbol *Sym : Entry.Symbols) {
- if (Sym->isDefined())
- return;
-
- // If the block is not yet defined, we need to emit it at the end of the
- // function. Add the symbol to the DeletedAddrLabelsNeedingEmission list
- // for the containing Function. Since the block is being deleted, its
- // parent may already be removed, we have to get the function from 'Entry'.
- DeletedAddrLabelsNeedingEmission[Entry.Fn].push_back(Sym);
- }
-}
-
-void MMIAddrLabelMap::UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New) {
- // Get the entry for the RAUW'd block and remove it from our map.
- AddrLabelSymEntry OldEntry = std::move(AddrLabelSymbols[Old]);
- AddrLabelSymbols.erase(Old);
- assert(!OldEntry.Symbols.empty() && "Didn't have a symbol, why a callback?");
-
- AddrLabelSymEntry &NewEntry = AddrLabelSymbols[New];
-
- // If New is not address taken, just move our symbol over to it.
- if (NewEntry.Symbols.empty()) {
- BBCallbacks[OldEntry.Index].setPtr(New); // Update the callback.
- NewEntry = std::move(OldEntry); // Set New's entry.
- return;
- }
-
- BBCallbacks[OldEntry.Index] = nullptr; // Update the callback.
-
- // Otherwise, we need to add the old symbols to the new block's set.
- llvm::append_range(NewEntry.Symbols, OldEntry.Symbols);
-}
-
-void MMIAddrLabelMapCallbackPtr::deleted() {
- Map->UpdateForDeletedBlock(cast<BasicBlock>(getValPtr()));
-}
-
-void MMIAddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) {
- Map->UpdateForRAUWBlock(cast<BasicBlock>(getValPtr()), cast<BasicBlock>(V2));
-}
-
void MachineModuleInfo::initialize() {
ObjFileMMI = nullptr;
CurCallSite = 0;
NextFnNum = 0;
- UsesMSVCFloatingPoint = UsesMorestackAddr = false;
- HasSplitStack = HasNosplitStack = false;
- AddrLabelSymbols = nullptr;
+ UsesMSVCFloatingPoint = false;
+ DbgInfoAvailable = false;
}
void MachineModuleInfo::finalize() {
Personalities.clear();
- delete AddrLabelSymbols;
- AddrLabelSymbols = nullptr;
-
Context.reset();
// We don't clear the ExternalContext.
@@ -219,16 +60,11 @@ MachineModuleInfo::MachineModuleInfo(MachineModuleInfo &&MMI)
: TM(std::move(MMI.TM)),
Context(MMI.TM.getTargetTriple(), MMI.TM.getMCAsmInfo(),
MMI.TM.getMCRegisterInfo(), MMI.TM.getMCSubtargetInfo(), nullptr,
- nullptr, false),
+ &MMI.TM.Options.MCOptions, false),
MachineFunctions(std::move(MMI.MachineFunctions)) {
Context.setObjectFileInfo(MMI.TM.getObjFileLowering());
ObjFileMMI = MMI.ObjFileMMI;
CurCallSite = MMI.CurCallSite;
- UsesMSVCFloatingPoint = MMI.UsesMSVCFloatingPoint;
- UsesMorestackAddr = MMI.UsesMorestackAddr;
- HasSplitStack = MMI.HasSplitStack;
- HasNosplitStack = MMI.HasNosplitStack;
- AddrLabelSymbols = MMI.AddrLabelSymbols;
ExternalContext = MMI.ExternalContext;
TheModule = MMI.TheModule;
}
@@ -236,7 +72,7 @@ MachineModuleInfo::MachineModuleInfo(MachineModuleInfo &&MMI)
MachineModuleInfo::MachineModuleInfo(const LLVMTargetMachine *TM)
: TM(*TM), Context(TM->getTargetTriple(), TM->getMCAsmInfo(),
TM->getMCRegisterInfo(), TM->getMCSubtargetInfo(),
- nullptr, nullptr, false) {
+ nullptr, &TM->Options.MCOptions, false) {
Context.setObjectFileInfo(TM->getObjFileLowering());
initialize();
}
@@ -245,7 +81,7 @@ MachineModuleInfo::MachineModuleInfo(const LLVMTargetMachine *TM,
MCContext *ExtContext)
: TM(*TM), Context(TM->getTargetTriple(), TM->getMCAsmInfo(),
TM->getMCRegisterInfo(), TM->getMCSubtargetInfo(),
- nullptr, nullptr, false),
+ nullptr, &TM->Options.MCOptions, false),
ExternalContext(ExtContext) {
Context.setObjectFileInfo(TM->getObjFileLowering());
initialize();
@@ -253,25 +89,6 @@ MachineModuleInfo::MachineModuleInfo(const LLVMTargetMachine *TM,
MachineModuleInfo::~MachineModuleInfo() { finalize(); }
-//===- Address of Block Management ----------------------------------------===//
-
-ArrayRef<MCSymbol *>
-MachineModuleInfo::getAddrLabelSymbolToEmit(const BasicBlock *BB) {
- // Lazily create AddrLabelSymbols.
- if (!AddrLabelSymbols)
- AddrLabelSymbols = new MMIAddrLabelMap(getContext());
- return AddrLabelSymbols->getAddrLabelSymbolToEmit(const_cast<BasicBlock*>(BB));
-}
-
-void MachineModuleInfo::
-takeDeletedSymbolsForFunction(const Function *F,
- std::vector<MCSymbol*> &Result) {
- // If no blocks have had their addresses taken, we're done.
- if (!AddrLabelSymbols) return;
- return AddrLabelSymbols->
- takeDeletedSymbolsForFunction(const_cast<Function*>(F), Result);
-}
-
/// \name Exception Handling
/// \{
@@ -318,6 +135,13 @@ void MachineModuleInfo::deleteMachineFunctionFor(Function &F) {
LastResult = nullptr;
}
+void MachineModuleInfo::insertFunction(const Function &F,
+ std::unique_ptr<MachineFunction> &&MF) {
+ auto I = MachineFunctions.insert(std::make_pair(&F, std::move(MF)));
+ assert(I.second && "machine function already mapped");
+ (void)I;
+}
+
namespace {
/// This pass frees the MachineFunction object associated with a Function.
@@ -409,7 +233,8 @@ bool MachineModuleInfoWrapperPass::doInitialization(Module &M) {
Ctx.diagnose(
DiagnosticInfoSrcMgr(SMD, M.getName(), IsInlineAsm, LocCookie));
});
- MMI.DbgInfoAvailable = !M.debug_compile_units().empty();
+ MMI.DbgInfoAvailable = !DisableDebugInfoPrinting &&
+ !M.debug_compile_units().empty();
return false;
}
@@ -424,6 +249,7 @@ MachineModuleInfo MachineModuleAnalysis::run(Module &M,
ModuleAnalysisManager &) {
MachineModuleInfo MMI(TM);
MMI.TheModule = &M;
- MMI.DbgInfoAvailable = !M.debug_compile_units().empty();
+ MMI.DbgInfoAvailable = !DisableDebugInfoPrinting &&
+ !M.debug_compile_units().empty();
return MMI;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp
index 680dbe54ffaf..46ad1de78c46 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp
@@ -14,9 +14,7 @@
#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/Loads.h"
-#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/CodeGen/MIRFormatter.h"
-#include "llvm/CodeGen/MIRPrinter.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
index 5347a7b0d890..631768ec986c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
@@ -53,10 +53,8 @@ void MachineOptimizationRemarkEmitter::emit(
LLVMContext &Ctx = MF.getFunction().getContext();
// Only emit it if its hotness meets the threshold.
- if (OptDiag.getHotness().getValueOr(0) <
- Ctx.getDiagnosticsHotnessThreshold()) {
+ if (OptDiag.getHotness().value_or(0) < Ctx.getDiagnosticsHotnessThreshold())
return;
- }
Ctx.diagnose(OptDiag);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp
index 7783b5e0d3cc..5da68abc8f6a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp
@@ -59,6 +59,8 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/Passes.h"
@@ -82,9 +84,17 @@ using namespace llvm;
using namespace ore;
using namespace outliner;
+// Statistics for outlined functions.
STATISTIC(NumOutlined, "Number of candidates outlined");
STATISTIC(FunctionsCreated, "Number of functions created");
+// Statistics for instruction mapping.
+STATISTIC(NumLegalInUnsignedVec, "Number of legal instrs in unsigned vector");
+STATISTIC(NumIllegalInUnsignedVec,
+ "Number of illegal instrs in unsigned vector");
+STATISTIC(NumInvisible, "Number of invisible instrs in unsigned vector");
+STATISTIC(UnsignedVecSize, "Size of unsigned vector");
+
// Set to true if the user wants the outliner to run on linkonceodr linkage
// functions. This is false by default because the linker can dedupe linkonceodr
// functions. Since the outliner is confined to a single module (modulo LTO),
@@ -188,6 +198,8 @@ struct InstructionMapper {
assert(LegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&
"Tried to assign DenseMap tombstone or empty key to instruction.");
+ // Statistics.
+ ++NumLegalInUnsignedVec;
return MINumber;
}
@@ -215,6 +227,8 @@ struct InstructionMapper {
InstrListForMBB.push_back(It);
UnsignedVecForMBB.push_back(IllegalInstrNumber);
IllegalInstrNumber--;
+ // Statistics.
+ ++NumIllegalInUnsignedVec;
assert(LegalInstrNumber < IllegalInstrNumber &&
"Instruction mapping overflow!");
@@ -293,6 +307,7 @@ struct InstructionMapper {
case InstrType::Invisible:
// Normally this is set by mapTo(Blah)Unsigned, but we just want to
// skip this instruction. So, unset the flag here.
+ ++NumInvisible;
AddedIllegalLastTime = false;
break;
}
@@ -623,6 +638,15 @@ MachineFunction *MachineOutliner::createOutlinedFunction(
TII.mergeOutliningCandidateAttributes(*F, OF.Candidates);
+ // Set uwtable, so we generate eh_frame.
+ UWTableKind UW = std::accumulate(
+ OF.Candidates.cbegin(), OF.Candidates.cend(), UWTableKind::None,
+ [](UWTableKind K, const outliner::Candidate &C) {
+ return std::max(K, C.getMF()->getFunction().getUWTableKind());
+ });
+ if (UW != UWTableKind::None)
+ F->setUWTableKind(UW);
+
BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F);
IRBuilder<> Builder(EntryBB);
Builder.CreateRetVoid();
@@ -641,17 +665,20 @@ MachineFunction *MachineOutliner::createOutlinedFunction(
++I) {
if (I->isDebugInstr())
continue;
- MachineInstr *NewMI = MF.CloneMachineInstr(&*I);
+
+ // Don't keep debug information for outlined instructions.
+ auto DL = DebugLoc();
if (I->isCFIInstruction()) {
- unsigned CFIIndex = NewMI->getOperand(0).getCFIIndex();
+ unsigned CFIIndex = I->getOperand(0).getCFIIndex();
MCCFIInstruction CFI = Instrs[CFIIndex];
- (void)MF.addFrameInst(CFI);
+ BuildMI(MBB, MBB.end(), DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(MF.addFrameInst(CFI));
+ } else {
+ MachineInstr *NewMI = MF.CloneMachineInstr(&*I);
+ NewMI->dropMemRefs(MF);
+ NewMI->setDebugLoc(DL);
+ MBB.insert(MBB.end(), NewMI);
}
- NewMI->dropMemRefs(MF);
-
- // Don't keep debug information for outlined instructions.
- NewMI->setDebugLoc(DebugLoc());
- MBB.insert(MBB.end(), NewMI);
}
// Set normal properties for a late MachineFunction.
@@ -831,9 +858,10 @@ bool MachineOutliner::outline(Module &M,
MBB.erase(std::next(StartIt), std::next(EndIt));
// Keep track of what we removed by marking them all as -1.
- std::for_each(Mapper.UnsignedVec.begin() + C.getStartIdx(),
- Mapper.UnsignedVec.begin() + C.getEndIdx() + 1,
- [](unsigned &I) { I = static_cast<unsigned>(-1); });
+ for (unsigned &I :
+ llvm::make_range(Mapper.UnsignedVec.begin() + C.getStartIdx(),
+ Mapper.UnsignedVec.begin() + C.getEndIdx() + 1))
+ I = static_cast<unsigned>(-1);
OutlinedSomething = true;
// Statistics.
@@ -896,6 +924,9 @@ void MachineOutliner::populateMapper(InstructionMapper &Mapper, Module &M,
// MBB is suitable for outlining. Map it to a list of unsigneds.
Mapper.convertToUnsignedVec(MBB, *TII);
}
+
+ // Statistics.
+ UnsignedVecSize = Mapper.UnsignedVec.size();
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp
index 762395542b40..8d500398f55e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -29,6 +29,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/MachinePipeliner.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
@@ -43,6 +44,7 @@
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/DFAPacketizer.h"
#include "llvm/CodeGen/LiveIntervals.h"
@@ -55,7 +57,6 @@
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/CodeGen/MachinePipeliner.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/ModuloSchedule.h"
#include "llvm/CodeGen/RegisterPressure.h"
@@ -66,7 +67,6 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Attributes.h"
-#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/MC/MCInstrDesc.h"
@@ -109,7 +109,6 @@ STATISTIC(NumFailLargeMaxStage, "Pipeliner abort due to too many stages");
/// A command line option to turn software pipelining on or off.
static cl::opt<bool> EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true),
- cl::ZeroOrMore,
cl::desc("Enable Software Pipelining"));
/// A command line option to enable SWP at -Os.
@@ -147,8 +146,8 @@ static cl::opt<int> SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1));
#endif
static cl::opt<bool> SwpIgnoreRecMII("pipeliner-ignore-recmii",
- cl::ReallyHidden, cl::init(false),
- cl::ZeroOrMore, cl::desc("Ignore RecMII"));
+ cl::ReallyHidden,
+ cl::desc("Ignore RecMII"));
static cl::opt<bool> SwpShowResMask("pipeliner-show-mask", cl::Hidden,
cl::init(false));
@@ -169,10 +168,9 @@ static cl::opt<bool> ExperimentalCodeGen(
namespace llvm {
// A command line option to enable the CopyToPhi DAG mutation.
-cl::opt<bool>
- SwpEnableCopyToPhi("pipeliner-enable-copytophi", cl::ReallyHidden,
- cl::init(true), cl::ZeroOrMore,
- cl::desc("Enable CopyToPhi DAG Mutation"));
+cl::opt<bool> SwpEnableCopyToPhi("pipeliner-enable-copytophi", cl::ReallyHidden,
+ cl::init(true),
+ cl::desc("Enable CopyToPhi DAG Mutation"));
} // end namespace llvm
@@ -255,6 +253,7 @@ bool MachinePipeliner::scheduleLoop(MachineLoop &L) {
<< "Failed to pipeline loop";
});
+ LI.LoopPipelinerInfo.reset();
return Changed;
}
@@ -262,6 +261,7 @@ bool MachinePipeliner::scheduleLoop(MachineLoop &L) {
Changed = swingModuloScheduler(L);
+ LI.LoopPipelinerInfo.reset();
return Changed;
}
@@ -354,7 +354,8 @@ bool MachinePipeliner::canPipelineLoop(MachineLoop &L) {
LI.LoopInductionVar = nullptr;
LI.LoopCompare = nullptr;
- if (!TII->analyzeLoopForPipelining(L.getTopBlock())) {
+ LI.LoopPipelinerInfo = TII->analyzeLoopForPipelining(L.getTopBlock());
+ if (!LI.LoopPipelinerInfo) {
LLVM_DEBUG(dbgs() << "Unable to analyzeLoop, can NOT pipeline Loop\n");
NumFailLoop++;
ORE->emit([&]() {
@@ -419,7 +420,7 @@ bool MachinePipeliner::swingModuloScheduler(MachineLoop &L) {
assert(L.getBlocks().size() == 1 && "SMS works on single blocks only.");
SwingSchedulerDAG SMS(*this, L, getAnalysis<LiveIntervals>(), RegClassInfo,
- II_setByPragma);
+ II_setByPragma, LI.LoopPipelinerInfo.get());
MachineBasicBlock *MBB = L.getHeader();
// The kernel should not include any terminator instructions. These
@@ -513,7 +514,7 @@ void SwingSchedulerDAG::schedule() {
// Don't pipeline large loops.
if (SwpMaxMii != -1 && (int)MII > SwpMaxMii) {
LLVM_DEBUG(dbgs() << "MII > " << SwpMaxMii
- << ", we don't pipleline large loops\n");
+ << ", we don't pipeline large loops\n");
NumFailLargeMaxMII++;
Pass.ORE->emit([&]() {
return MachineOptimizationRemarkAnalysis(
@@ -1297,8 +1298,7 @@ bool SwingSchedulerDAG::Circuits::circuit(int V, int S, NodeSetType &NodeSets,
for (auto W : AdjK[V]) {
if (W < S)
continue;
- if (B[W].count(SV) == 0)
- B[W].insert(SV);
+ B[W].insert(SV);
}
}
Stack.pop_back();
@@ -1422,7 +1422,7 @@ void SwingSchedulerDAG::CopyToPhiMutation::apply(ScheduleDAGInstrs *DAG) {
/// We ignore the back-edge recurrence in order to avoid unbounded recursion
/// in the calculation of the ASAP, ALAP, etc functions.
static bool ignoreDependence(const SDep &D, bool isPred) {
- if (D.isArtificial())
+ if (D.isArtificial() || D.getSUnit()->isBoundaryNode())
return true;
return D.getKind() == SDep::Anti && isPred;
}
@@ -1471,6 +1471,8 @@ void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
SUnit *SU = &SUnits[I];
for (const SDep &S : SU->Succs) {
SUnit *succ = S.getSUnit();
+ if (succ->isBoundaryNode())
+ continue;
if (S.getLatency() == 0)
zeroLatencyHeight =
std::max(zeroLatencyHeight, getZeroLatencyHeight(succ) + 1);
@@ -1575,7 +1577,9 @@ static bool computePath(SUnit *Cur, SetVector<SUnit *> &Path,
return Path.contains(Cur);
bool FoundPath = false;
for (auto &SI : Cur->Succs)
- FoundPath |= computePath(SI.getSUnit(), Path, DestNodes, Exclude, Visited);
+ if (!ignoreDependence(SI, false))
+ FoundPath |=
+ computePath(SI.getSUnit(), Path, DestNodes, Exclude, Visited);
for (auto &PI : Cur->Preds)
if (PI.getKind() == SDep::Anti)
FoundPath |=
@@ -1663,7 +1667,7 @@ void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) {
LLVM_DEBUG(
dbgs() << "Excess register pressure: SU(" << SU->NodeNum << ") "
<< TRI->getRegPressureSetName(RPDelta.Excess.getPSet())
- << ":" << RPDelta.Excess.getUnitInc());
+ << ":" << RPDelta.Excess.getUnitInc() << "\n");
NS.setExceedPressure(SU);
break;
}
@@ -1718,7 +1722,7 @@ void SwingSchedulerDAG::checkNodeSets(NodeSetType &NodeSets) {
}
/// Add the nodes that do not belong to a recurrence set into groups
-/// based upon connected componenets.
+/// based upon connected components.
void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
SetVector<SUnit *> NodesAdded;
SmallPtrSet<SUnit *, 8> Visited;
@@ -1788,7 +1792,8 @@ void SwingSchedulerDAG::addConnectedNodes(SUnit *SU, NodeSet &NewSet,
NodesAdded.insert(SU);
for (auto &SI : SU->Succs) {
SUnit *Successor = SI.getSUnit();
- if (!SI.isArtificial() && NodesAdded.count(Successor) == 0)
+ if (!SI.isArtificial() && !Successor->isBoundaryNode() &&
+ NodesAdded.count(Successor) == 0)
addConnectedNodes(Successor, NewSet, NodesAdded);
}
for (auto &PI : SU->Preds) {
@@ -1803,8 +1808,7 @@ void SwingSchedulerDAG::addConnectedNodes(SUnit *SU, NodeSet &NewSet,
static bool isIntersect(SmallSetVector<SUnit *, 8> &Set1, const NodeSet &Set2,
SmallSetVector<SUnit *, 8> &Result) {
Result.clear();
- for (unsigned i = 0, e = Set1.size(); i != e; ++i) {
- SUnit *SU = Set1[i];
+ for (SUnit *SU : Set1) {
if (Set2.count(SU) != 0)
Result.insert(SU);
}
@@ -2080,6 +2084,11 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
});
} while (++NI != NE && scheduleFound);
+ // If a schedule is found, ensure non-pipelined instructions are in stage 0
+ if (scheduleFound)
+ scheduleFound =
+ Schedule.normalizeNonPipelinedInstructions(this, LoopPipelinerInfo);
+
// If a schedule is found, check if it is a valid schedule too.
if (scheduleFound)
scheduleFound = Schedule.isValidSchedule(this);
@@ -2263,7 +2272,7 @@ MachineInstr *SwingSchedulerDAG::findDefInLoop(Register Reg) {
bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep,
bool isSucc) {
if ((Dep.getKind() != SDep::Order && Dep.getKind() != SDep::Output) ||
- Dep.isArtificial())
+ Dep.isArtificial() || Dep.getSUnit()->isBoundaryNode())
return false;
if (!SwpPruneLoopCarried)
@@ -2430,7 +2439,7 @@ int SMSchedule::latestCycleInChain(const SDep &Dep) {
while (!Worklist.empty()) {
const SDep &Cur = Worklist.pop_back_val();
SUnit *SuccSU = Cur.getSUnit();
- if (Visited.count(SuccSU))
+ if (Visited.count(SuccSU) || SuccSU->isBoundaryNode())
continue;
std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SuccSU);
if (it == InstrToCycle.end())
@@ -2697,21 +2706,91 @@ bool SMSchedule::isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD,
return false;
}
+/// Determine transitive dependences of unpipelineable instructions
+SmallSet<SUnit *, 8> SMSchedule::computeUnpipelineableNodes(
+ SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI) {
+ SmallSet<SUnit *, 8> DoNotPipeline;
+ SmallVector<SUnit *, 8> Worklist;
+
+ for (auto &SU : SSD->SUnits)
+ if (SU.isInstr() && PLI->shouldIgnoreForPipelining(SU.getInstr()))
+ Worklist.push_back(&SU);
+
+ while (!Worklist.empty()) {
+ auto SU = Worklist.pop_back_val();
+ if (DoNotPipeline.count(SU))
+ continue;
+ LLVM_DEBUG(dbgs() << "Do not pipeline SU(" << SU->NodeNum << ")\n");
+ DoNotPipeline.insert(SU);
+ for (auto &Dep : SU->Preds)
+ Worklist.push_back(Dep.getSUnit());
+ if (SU->getInstr()->isPHI())
+ for (auto &Dep : SU->Succs)
+ if (Dep.getKind() == SDep::Anti)
+ Worklist.push_back(Dep.getSUnit());
+ }
+ return DoNotPipeline;
+}
+
+// Determine all instructions upon which any unpipelineable instruction depends
+// and ensure that they are in stage 0. If unable to do so, return false.
+bool SMSchedule::normalizeNonPipelinedInstructions(
+ SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI) {
+ SmallSet<SUnit *, 8> DNP = computeUnpipelineableNodes(SSD, PLI);
+
+ int NewLastCycle = INT_MIN;
+ for (SUnit &SU : SSD->SUnits) {
+ if (!SU.isInstr())
+ continue;
+ if (!DNP.contains(&SU) || stageScheduled(&SU) == 0) {
+ NewLastCycle = std::max(NewLastCycle, InstrToCycle[&SU]);
+ continue;
+ }
+
+ // Put the non-pipelined instruction as early as possible in the schedule
+ int NewCycle = getFirstCycle();
+ for (auto &Dep : SU.Preds)
+ NewCycle = std::max(InstrToCycle[Dep.getSUnit()], NewCycle);
+
+ int OldCycle = InstrToCycle[&SU];
+ if (OldCycle != NewCycle) {
+ InstrToCycle[&SU] = NewCycle;
+ auto &OldS = getInstructions(OldCycle);
+ llvm::erase_value(OldS, &SU);
+ getInstructions(NewCycle).emplace_back(&SU);
+ LLVM_DEBUG(dbgs() << "SU(" << SU.NodeNum
+ << ") is not pipelined; moving from cycle " << OldCycle
+ << " to " << NewCycle << " Instr:" << *SU.getInstr());
+ }
+ NewLastCycle = std::max(NewLastCycle, NewCycle);
+ }
+ LastCycle = NewLastCycle;
+ return true;
+}
+
// Check if the generated schedule is valid. This function checks if
// an instruction that uses a physical register is scheduled in a
// different stage than the definition. The pipeliner does not handle
// physical register values that may cross a basic block boundary.
+// Furthermore, if a physical def/use pair is assigned to the same
+// cycle, orderDependence does not guarantee def/use ordering, so that
+// case should be considered invalid. (The test checks for both
+// earlier and same-cycle use to be more robust.)
bool SMSchedule::isValidSchedule(SwingSchedulerDAG *SSD) {
for (SUnit &SU : SSD->SUnits) {
if (!SU.hasPhysRegDefs)
continue;
int StageDef = stageScheduled(&SU);
+ int CycleDef = InstrToCycle[&SU];
assert(StageDef != -1 && "Instruction should have been scheduled.");
for (auto &SI : SU.Succs)
- if (SI.isAssignedRegDep())
- if (Register::isPhysicalRegister(SI.getReg()))
+ if (SI.isAssignedRegDep() && !SI.getSUnit()->isBoundaryNode())
+ if (Register::isPhysicalRegister(SI.getReg())) {
if (stageScheduled(SI.getSUnit()) != StageDef)
return false;
+ if (InstrToCycle[SI.getSUnit()] <= CycleDef)
+ return false;
+ }
}
return true;
}
@@ -2998,7 +3077,7 @@ bool ResourceManager::canReserveResources(const MCInstrDesc *MID) const {
if (!SCDesc->isValid()) {
LLVM_DEBUG({
dbgs() << "No valid Schedule Class Desc for schedClass!\n";
- dbgs() << "isPseduo:" << MID->isPseudo() << "\n";
+ dbgs() << "isPseudo:" << MID->isPseudo() << "\n";
});
return true;
}
@@ -3038,7 +3117,7 @@ void ResourceManager::reserveResources(const MCInstrDesc *MID) {
if (!SCDesc->isValid()) {
LLVM_DEBUG({
dbgs() << "No valid Schedule Class Desc for schedClass!\n";
- dbgs() << "isPseduo:" << MID->isPseudo() << "\n";
+ dbgs() << "isPseudo:" << MID->isPseudo() << "\n";
});
return;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp
index 1a4ad53ddf81..511bb80052c2 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp
@@ -12,7 +12,6 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/ADT/iterator_range.h"
-#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -651,3 +650,18 @@ bool MachineRegisterInfo::isReservedRegUnit(unsigned Unit) const {
}
return false;
}
+
+bool MachineRegisterInfo::isArgumentRegister(const MachineFunction &MF,
+ MCRegister Reg) const {
+ return getTargetRegisterInfo()->isArgumentRegister(MF, Reg);
+}
+
+bool MachineRegisterInfo::isFixedRegister(const MachineFunction &MF,
+ MCRegister Reg) const {
+ return getTargetRegisterInfo()->isFixedRegister(MF, Reg);
+}
+
+bool MachineRegisterInfo::isGeneralPurposeRegister(const MachineFunction &MF,
+ MCRegister Reg) const {
+ return getTargetRegisterInfo()->isGeneralPurposeRegister(MF, Reg);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineSSAContext.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineSSAContext.cpp
index 8db893535daf..01cea85ecc7c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineSSAContext.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineSSAContext.cpp
@@ -14,7 +14,9 @@
#include "llvm/CodeGen/MachineSSAContext.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp
index b043d4c1b0c1..4e00a211713e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -32,7 +32,6 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachinePassRegistry.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/RegisterPressure.h"
#include "llvm/CodeGen/ScheduleDAG.h"
@@ -752,7 +751,7 @@ void ScheduleDAGMI::moveInstruction(
}
bool ScheduleDAGMI::checkSchedLimit() {
-#ifndef NDEBUG
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
CurrentTop = CurrentBottom;
return false;
@@ -920,12 +919,10 @@ void ScheduleDAGMI::placeDebugValues() {
MachineBasicBlock::iterator OrigPrevMI = P.second;
if (&*RegionBegin == DbgValue)
++RegionBegin;
- BB->splice(++OrigPrevMI, BB, DbgValue);
- if (OrigPrevMI == std::prev(RegionEnd))
+ BB->splice(std::next(OrigPrevMI), BB, DbgValue);
+ if (RegionEnd != BB->end() && OrigPrevMI == &*RegionEnd)
RegionEnd = DbgValue;
}
- DbgValues.clear();
- FirstDbgValue = nullptr;
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -2008,7 +2005,7 @@ void SchedBoundary::reset() {
ReservedCycles.clear();
ReservedCyclesIndex.clear();
ResourceGroupSubUnitMasks.clear();
-#ifndef NDEBUG
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
// Track the maximum number of stall cycles that could arise either from the
// latency of a DAG edge or the number of cycles that a processor resource is
// reserved (SchedBoundary::ReservedCycles).
@@ -2196,7 +2193,7 @@ bool SchedBoundary::checkHazard(SUnit *SU) {
unsigned NRCycle, InstanceIdx;
std::tie(NRCycle, InstanceIdx) = getNextResourceCycle(SC, ResIdx, Cycles);
if (NRCycle > CurrCycle) {
-#ifndef NDEBUG
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
MaxObservedStall = std::max(Cycles, MaxObservedStall);
#endif
LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") "
@@ -2263,7 +2260,7 @@ void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue,
unsigned Idx) {
assert(SU->getInstr() && "Scheduled SUnit must have instr");
-#ifndef NDEBUG
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
// ReadyCycle was been bumped up to the CurrCycle when this node was
// scheduled, but CurrCycle may have been eagerly advanced immediately after
// scheduling, so may now be greater than ReadyCycle.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp
index bc03776bde19..006ba9273dfb 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp
@@ -16,19 +16,20 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/SparseBitVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineCycleAnalysis.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -95,18 +96,18 @@ static cl::opt<unsigned> SinkLoadBlocksThreshold(
cl::init(20), cl::Hidden);
static cl::opt<bool>
-SinkInstsIntoLoop("sink-insts-to-avoid-spills",
- cl::desc("Sink instructions into loops to avoid "
- "register spills"),
- cl::init(false), cl::Hidden);
-
-static cl::opt<unsigned> SinkIntoLoopLimit(
- "machine-sink-loop-limit",
- cl::desc("The maximum number of instructions considered for loop sinking."),
+ SinkInstsIntoCycle("sink-insts-to-avoid-spills",
+ cl::desc("Sink instructions into cycles to avoid "
+ "register spills"),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<unsigned> SinkIntoCycleLimit(
+ "machine-sink-cycle-limit",
+ cl::desc("The maximum number of instructions considered for cycle sinking."),
cl::init(50), cl::Hidden);
STATISTIC(NumSunk, "Number of machine instructions sunk");
-STATISTIC(NumLoopSunk, "Number of machine instructions sunk into a loop");
+STATISTIC(NumCycleSunk, "Number of machine instructions sunk into a cycle");
STATISTIC(NumSplit, "Number of critical edges split");
STATISTIC(NumCoalesces, "Number of copies coalesced");
STATISTIC(NumPostRACopySink, "Number of copies sunk after RA");
@@ -119,7 +120,7 @@ namespace {
MachineRegisterInfo *MRI; // Machine register information
MachineDominatorTree *DT; // Machine dominator tree
MachinePostDominatorTree *PDT; // Machine post dominator tree
- MachineLoopInfo *LI;
+ MachineCycleInfo *CI;
MachineBlockFrequencyInfo *MBFI;
const MachineBranchProbabilityInfo *MBPI;
AliasAnalysis *AA;
@@ -180,8 +181,9 @@ namespace {
AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<MachineDominatorTree>();
AU.addRequired<MachinePostDominatorTree>();
- AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<MachineCycleInfoWrapperPass>();
AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.addPreserved<MachineCycleInfoWrapperPass>();
AU.addPreserved<MachineLoopInfo>();
if (UseBlockFreqInfo)
AU.addRequired<MachineBlockFrequencyInfo>();
@@ -232,9 +234,9 @@ namespace {
MachineBasicBlock *FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
bool &BreakPHIEdge, AllSuccsCache &AllSuccessors);
- void FindLoopSinkCandidates(MachineLoop *L, MachineBasicBlock *BB,
- SmallVectorImpl<MachineInstr *> &Candidates);
- bool SinkIntoLoop(MachineLoop *L, MachineInstr &I);
+ void FindCycleSinkCandidates(MachineCycle *Cycle, MachineBasicBlock *BB,
+ SmallVectorImpl<MachineInstr *> &Candidates);
+ bool SinkIntoCycle(MachineCycle *Cycle, MachineInstr &I);
bool isProfitableToSinkTo(Register Reg, MachineInstr &MI,
MachineBasicBlock *MBB,
@@ -261,7 +263,7 @@ INITIALIZE_PASS_BEGIN(MachineSinking, DEBUG_TYPE,
"Machine code sinking", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
-INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineCycleInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(MachineSinking, DEBUG_TYPE,
"Machine code sinking", false, false)
@@ -378,26 +380,27 @@ static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) {
return false;
}
-void MachineSinking::FindLoopSinkCandidates(MachineLoop *L, MachineBasicBlock *BB,
+void MachineSinking::FindCycleSinkCandidates(
+ MachineCycle *Cycle, MachineBasicBlock *BB,
SmallVectorImpl<MachineInstr *> &Candidates) {
for (auto &MI : *BB) {
- LLVM_DEBUG(dbgs() << "LoopSink: Analysing candidate: " << MI);
+ LLVM_DEBUG(dbgs() << "CycleSink: Analysing candidate: " << MI);
if (!TII->shouldSink(MI)) {
- LLVM_DEBUG(dbgs() << "LoopSink: Instruction not a candidate for this "
+ LLVM_DEBUG(dbgs() << "CycleSink: Instruction not a candidate for this "
"target\n");
continue;
}
- if (!L->isLoopInvariant(MI)) {
- LLVM_DEBUG(dbgs() << "LoopSink: Instruction is not loop invariant\n");
+ if (!isCycleInvariant(Cycle, MI)) {
+ LLVM_DEBUG(dbgs() << "CycleSink: Instruction is not cycle invariant\n");
continue;
}
bool DontMoveAcrossStore = true;
if (!MI.isSafeToMove(AA, DontMoveAcrossStore)) {
- LLVM_DEBUG(dbgs() << "LoopSink: Instruction not safe to move.\n");
+ LLVM_DEBUG(dbgs() << "CycleSink: Instruction not safe to move.\n");
continue;
}
if (MI.mayLoad() && !mayLoadFromGOTOrConstantPool(MI)) {
- LLVM_DEBUG(dbgs() << "LoopSink: Dont sink GOT or constant pool loads\n");
+ LLVM_DEBUG(dbgs() << "CycleSink: Dont sink GOT or constant pool loads\n");
continue;
}
if (MI.isConvergent())
@@ -409,7 +412,7 @@ void MachineSinking::FindLoopSinkCandidates(MachineLoop *L, MachineBasicBlock *B
if (!MRI->hasOneDef(MO.getReg()))
continue;
- LLVM_DEBUG(dbgs() << "LoopSink: Instruction added as candidate.\n");
+ LLVM_DEBUG(dbgs() << "CycleSink: Instruction added as candidate.\n");
Candidates.push_back(&MI);
}
}
@@ -425,22 +428,12 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
MRI = &MF.getRegInfo();
DT = &getAnalysis<MachineDominatorTree>();
PDT = &getAnalysis<MachinePostDominatorTree>();
- LI = &getAnalysis<MachineLoopInfo>();
+ CI = &getAnalysis<MachineCycleInfoWrapperPass>().getCycleInfo();
MBFI = UseBlockFreqInfo ? &getAnalysis<MachineBlockFrequencyInfo>() : nullptr;
MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
RegClassInfo.runOnMachineFunction(MF);
- // MachineSink currently uses MachineLoopInfo, which only recognizes natural
- // loops. As such, we could sink instructions into irreducible cycles, which
- // would be non-profitable.
- // WARNING: The current implementation of hasStoreBetween() is incorrect for
- // sinking into irreducible cycles (PR53990), this bailout is currently
- // necessary for correctness, not just profitability.
- ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
- if (containsIrreducibleCFG<MachineBasicBlock *>(RPOT, *LI))
- return false;
-
bool EverMadeChange = false;
while (true) {
@@ -473,32 +466,33 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
EverMadeChange = true;
}
- if (SinkInstsIntoLoop) {
- SmallVector<MachineLoop *, 8> Loops(LI->begin(), LI->end());
- for (auto *L : Loops) {
- MachineBasicBlock *Preheader = LI->findLoopPreheader(L);
+ if (SinkInstsIntoCycle) {
+ SmallVector<MachineCycle *, 8> Cycles(CI->toplevel_begin(),
+ CI->toplevel_end());
+ for (auto *Cycle : Cycles) {
+ MachineBasicBlock *Preheader = Cycle->getCyclePreheader();
if (!Preheader) {
- LLVM_DEBUG(dbgs() << "LoopSink: Can't find preheader\n");
+ LLVM_DEBUG(dbgs() << "CycleSink: Can't find preheader\n");
continue;
}
SmallVector<MachineInstr *, 8> Candidates;
- FindLoopSinkCandidates(L, Preheader, Candidates);
+ FindCycleSinkCandidates(Cycle, Preheader, Candidates);
// Walk the candidates in reverse order so that we start with the use
// of a def-use chain, if there is any.
// TODO: Sort the candidates using a cost-model.
unsigned i = 0;
for (MachineInstr *I : llvm::reverse(Candidates)) {
- if (i++ == SinkIntoLoopLimit) {
- LLVM_DEBUG(dbgs() << "LoopSink: Limit reached of instructions to "
+ if (i++ == SinkIntoCycleLimit) {
+ LLVM_DEBUG(dbgs() << "CycleSink: Limit reached of instructions to "
"be analysed.");
break;
}
- if (!SinkIntoLoop(L, *I))
+ if (!SinkIntoCycle(Cycle, *I))
break;
EverMadeChange = true;
- ++NumLoopSunk;
+ ++NumCycleSunk;
}
}
}
@@ -520,12 +514,12 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
// Don't bother sinking code out of unreachable blocks. In addition to being
// unprofitable, it can also lead to infinite looping, because in an
- // unreachable loop there may be nowhere to stop.
+ // unreachable cycle there may be nowhere to stop.
if (!DT->isReachableFromEntry(&MBB)) return false;
bool MadeChange = false;
- // Cache all successors, sorted by frequency info and loop depth.
+ // Cache all successors, sorted by frequency info and cycle depth.
AllSuccsCache AllSuccessors;
// Walk the basic block bottom-up. Remember if we saw a store.
@@ -644,13 +638,16 @@ bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI,
if (!isWorthBreakingCriticalEdge(MI, FromBB, ToBB))
return false;
- // Avoid breaking back edge. From == To means backedge for single BB loop.
+ // Avoid breaking back edge. From == To means backedge for single BB cycle.
if (!SplitEdges || FromBB == ToBB)
return false;
- // Check for backedges of more "complex" loops.
- if (LI->getLoopFor(FromBB) == LI->getLoopFor(ToBB) &&
- LI->isLoopHeader(ToBB))
+ MachineCycle *FromCycle = CI->getCycle(FromBB);
+ MachineCycle *ToCycle = CI->getCycle(ToBB);
+
+ // Check for backedges of more "complex" cycles.
+ if (FromCycle == ToCycle && FromCycle &&
+ (!FromCycle->isReducible() || FromCycle->getHeader() == ToBB))
return false;
// It's not always legal to break critical edges and sink the computation
@@ -753,9 +750,9 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI,
if (!PDT->dominates(SuccToSinkTo, MBB))
return true;
- // It is profitable to sink an instruction from a deeper loop to a shallower
- // loop, even if the latter post-dominates the former (PR21115).
- if (LI->getLoopDepth(MBB) > LI->getLoopDepth(SuccToSinkTo))
+ // It is profitable to sink an instruction from a deeper cycle to a shallower
+ // cycle, even if the latter post-dominates the former (PR21115).
+ if (CI->getCycleDepth(MBB) > CI->getCycleDepth(SuccToSinkTo))
return true;
// Check if only use in post dominated block is PHI instruction.
@@ -776,11 +773,11 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI,
FindSuccToSinkTo(MI, SuccToSinkTo, BreakPHIEdge, AllSuccessors))
return isProfitableToSinkTo(Reg, MI, SuccToSinkTo, MBB2, AllSuccessors);
- MachineLoop *ML = LI->getLoopFor(MBB);
+ MachineCycle *MCycle = CI->getCycle(MBB);
- // If the instruction is not inside a loop, it is not profitable to sink MI to
+ // If the instruction is not inside a cycle, it is not profitable to sink MI to
// a post dominate block SuccToSinkTo.
- if (!ML)
+ if (!MCycle)
return false;
auto isRegisterPressureSetExceedLimit = [&](const TargetRegisterClass *RC) {
@@ -798,7 +795,7 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI,
return false;
};
- // If this instruction is inside a loop and sinking this instruction can make
+ // If this instruction is inside a Cycle and sinking this instruction can make
// more registers live range shorten, it is still prifitable.
for (const MachineOperand &MO : MI.operands()) {
// Ignore non-register operands.
@@ -826,14 +823,17 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI,
return false;
} else {
MachineInstr *DefMI = MRI->getVRegDef(Reg);
- // DefMI is defined outside of loop. There should be no live range
- // impact for this operand. Defination outside of loop means:
- // 1: defination is outside of loop.
- // 2: defination is in this loop, but it is a PHI in the loop header.
- if (LI->getLoopFor(DefMI->getParent()) != ML ||
- (DefMI->isPHI() && LI->isLoopHeader(DefMI->getParent())))
+ if (!DefMI)
+ continue;
+ MachineCycle *Cycle = CI->getCycle(DefMI->getParent());
+ // DefMI is defined outside of cycle. There should be no live range
+ // impact for this operand. Defination outside of cycle means:
+ // 1: defination is outside of cycle.
+ // 2: defination is in this cycle, but it is a PHI in the cycle header.
+ if (Cycle != MCycle || (DefMI->isPHI() && Cycle && Cycle->isReducible() &&
+ Cycle->getHeader() == DefMI->getParent()))
continue;
- // The DefMI is defined inside the loop.
+ // The DefMI is defined inside the cycle.
// If sinking this operand makes some register pressure set exceed limit,
// it is not profitable.
if (isRegisterPressureSetExceedLimit(MRI->getRegClass(Reg))) {
@@ -843,8 +843,8 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI,
}
}
- // If MI is in loop and all its operands are alive across the whole loop or if
- // no operand sinking make register pressure set exceed limit, it is
+ // If MI is in cycle and all its operands are alive across the whole cycle or
+ // if no operand sinking make register pressure set exceed limit, it is
// profitable to sink MI.
return true;
}
@@ -876,14 +876,14 @@ MachineSinking::GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB,
AllSuccs.push_back(DTChild->getBlock());
}
- // Sort Successors according to their loop depth or block frequency info.
+ // Sort Successors according to their cycle depth or block frequency info.
llvm::stable_sort(
AllSuccs, [this](const MachineBasicBlock *L, const MachineBasicBlock *R) {
uint64_t LHSFreq = MBFI ? MBFI->getBlockFreq(L).getFrequency() : 0;
uint64_t RHSFreq = MBFI ? MBFI->getBlockFreq(R).getFrequency() : 0;
bool HasBlockFreq = LHSFreq != 0 && RHSFreq != 0;
return HasBlockFreq ? LHSFreq < RHSFreq
- : LI->getLoopDepth(L) < LI->getLoopDepth(R);
+ : CI->getCycleDepth(L) < CI->getCycleDepth(R);
});
auto it = AllSuccessors.insert(std::make_pair(MBB, AllSuccs));
@@ -898,7 +898,7 @@ MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
AllSuccsCache &AllSuccessors) {
assert (MBB && "Invalid MachineBasicBlock!");
- // Loop over all the operands of the specified instruction. If there is
+ // loop over all the operands of the specified instruction. If there is
// anything we can't handle, bail out.
// SuccToSinkTo - This is the successor to sink this instruction to, once we
@@ -945,7 +945,7 @@ MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
// Otherwise, we should look at all the successors and decide which one
// we should sink to. If we have reliable block frequency information
// (frequency != 0) available, give successors with smaller frequencies
- // higher priority, otherwise prioritize smaller loop depths.
+ // higher priority, otherwise prioritize smaller cycle depths.
for (MachineBasicBlock *SuccBlock :
GetAllSortedSuccessors(MI, MBB, AllSuccessors)) {
bool LocalUse = false;
@@ -968,7 +968,7 @@ MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
}
// It is not possible to sink an instruction into its own block. This can
- // happen with loops.
+ // happen with cycles.
if (MBB == SuccToSinkTo)
return nullptr;
@@ -1093,8 +1093,7 @@ using MIRegs = std::pair<MachineInstr *, SmallVector<unsigned, 2>>;
/// Sink an instruction and its associated debug instructions.
static void performSink(MachineInstr &MI, MachineBasicBlock &SuccToSinkTo,
MachineBasicBlock::iterator InsertPos,
- SmallVectorImpl<MIRegs> &DbgValuesToSink) {
-
+ ArrayRef<MIRegs> DbgValuesToSink) {
// If we cannot find a location to use (merge with), then we erase the debug
// location to prevent debug-info driven tools from potentially reporting
// wrong location information.
@@ -1113,7 +1112,7 @@ static void performSink(MachineInstr &MI, MachineBasicBlock &SuccToSinkTo,
// DBG_VALUE location as 'undef', indicating that any earlier variable
// location should be terminated as we've optimised away the value at this
// point.
- for (auto DbgValueToSink : DbgValuesToSink) {
+ for (const auto &DbgValueToSink : DbgValuesToSink) {
MachineInstr *DbgMI = DbgValueToSink.first;
MachineInstr *NewDbgMI = DbgMI->getMF()->CloneMachineInstr(DbgMI);
SuccToSinkTo.insert(InsertPos, NewDbgMI);
@@ -1178,7 +1177,7 @@ bool MachineSinking::hasStoreBetween(MachineBasicBlock *From,
// If this BB is too big or the block number in straight line between From
// and To is too big, stop searching to save compiling time.
- if (BB->size() > SinkLoadInstsPerBlockThreshold ||
+ if (BB->sizeWithoutDebugLargerThan(SinkLoadInstsPerBlockThreshold) ||
HandledDomBlocks.size() > SinkLoadBlocksThreshold) {
for (auto *DomBB : HandledDomBlocks) {
if (DomBB != BB && DT->dominates(DomBB, BB))
@@ -1223,69 +1222,78 @@ bool MachineSinking::hasStoreBetween(MachineBasicBlock *From,
return HasAliasedStore;
}
-/// Sink instructions into loops if profitable. This especially tries to prevent
-/// register spills caused by register pressure if there is little to no
-/// overhead moving instructions into loops.
-bool MachineSinking::SinkIntoLoop(MachineLoop *L, MachineInstr &I) {
- LLVM_DEBUG(dbgs() << "LoopSink: Finding sink block for: " << I);
- MachineBasicBlock *Preheader = L->getLoopPreheader();
- assert(Preheader && "Loop sink needs a preheader block");
+/// Sink instructions into cycles if profitable. This especially tries to
+/// prevent register spills caused by register pressure if there is little to no
+/// overhead moving instructions into cycles.
+bool MachineSinking::SinkIntoCycle(MachineCycle *Cycle, MachineInstr &I) {
+ LLVM_DEBUG(dbgs() << "CycleSink: Finding sink block for: " << I);
+ MachineBasicBlock *Preheader = Cycle->getCyclePreheader();
+ assert(Preheader && "Cycle sink needs a preheader block");
MachineBasicBlock *SinkBlock = nullptr;
bool CanSink = true;
const MachineOperand &MO = I.getOperand(0);
for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) {
- LLVM_DEBUG(dbgs() << "LoopSink: Analysing use: " << MI);
- if (!L->contains(&MI)) {
- LLVM_DEBUG(dbgs() << "LoopSink: Use not in loop, can't sink.\n");
+ LLVM_DEBUG(dbgs() << "CycleSink: Analysing use: " << MI);
+ if (!Cycle->contains(MI.getParent())) {
+ LLVM_DEBUG(dbgs() << "CycleSink: Use not in cycle, can't sink.\n");
CanSink = false;
break;
}
// FIXME: Come up with a proper cost model that estimates whether sinking
- // the instruction (and thus possibly executing it on every loop
+ // the instruction (and thus possibly executing it on every cycle
// iteration) is more expensive than a register.
// For now assumes that copies are cheap and thus almost always worth it.
if (!MI.isCopy()) {
- LLVM_DEBUG(dbgs() << "LoopSink: Use is not a copy\n");
+ LLVM_DEBUG(dbgs() << "CycleSink: Use is not a copy\n");
CanSink = false;
break;
}
if (!SinkBlock) {
SinkBlock = MI.getParent();
- LLVM_DEBUG(dbgs() << "LoopSink: Setting sink block to: "
+ LLVM_DEBUG(dbgs() << "CycleSink: Setting sink block to: "
<< printMBBReference(*SinkBlock) << "\n");
continue;
}
SinkBlock = DT->findNearestCommonDominator(SinkBlock, MI.getParent());
if (!SinkBlock) {
- LLVM_DEBUG(dbgs() << "LoopSink: Can't find nearest dominator\n");
+ LLVM_DEBUG(dbgs() << "CycleSink: Can't find nearest dominator\n");
CanSink = false;
break;
}
- LLVM_DEBUG(dbgs() << "LoopSink: Setting nearest common dom block: " <<
+ LLVM_DEBUG(dbgs() << "CycleSink: Setting nearest common dom block: " <<
printMBBReference(*SinkBlock) << "\n");
}
if (!CanSink) {
- LLVM_DEBUG(dbgs() << "LoopSink: Can't sink instruction.\n");
+ LLVM_DEBUG(dbgs() << "CycleSink: Can't sink instruction.\n");
return false;
}
if (!SinkBlock) {
- LLVM_DEBUG(dbgs() << "LoopSink: Not sinking, can't find sink block.\n");
+ LLVM_DEBUG(dbgs() << "CycleSink: Not sinking, can't find sink block.\n");
return false;
}
if (SinkBlock == Preheader) {
- LLVM_DEBUG(dbgs() << "LoopSink: Not sinking, sink block is the preheader\n");
+ LLVM_DEBUG(
+ dbgs() << "CycleSink: Not sinking, sink block is the preheader\n");
return false;
}
- if (SinkBlock->size() > SinkLoadInstsPerBlockThreshold) {
- LLVM_DEBUG(dbgs() << "LoopSink: Not Sinking, block too large to analyse.\n");
+ if (SinkBlock->sizeWithoutDebugLargerThan(SinkLoadInstsPerBlockThreshold)) {
+ LLVM_DEBUG(
+ dbgs() << "CycleSink: Not Sinking, block too large to analyse.\n");
return false;
}
- LLVM_DEBUG(dbgs() << "LoopSink: Sinking instruction!\n");
- SinkBlock->splice(SinkBlock->getFirstNonPHI(), Preheader, I);
+ LLVM_DEBUG(dbgs() << "CycleSink: Sinking instruction!\n");
+ SinkBlock->splice(SinkBlock->SkipPHIsAndLabels(SinkBlock->begin()), Preheader,
+ I);
+
+ // Conservatively clear any kill flags on uses of sunk instruction
+ for (MachineOperand &MO : I.operands()) {
+ if (MO.isReg() && MO.readsReg())
+ RegsToClearKillFlags.insert(MO.getReg());
+ }
// The instruction is moved from its basic block, so do not retain the
// debug information.
@@ -1294,6 +1302,45 @@ bool MachineSinking::SinkIntoLoop(MachineLoop *L, MachineInstr &I) {
return true;
}
+/// Return true if a target defined block prologue instruction interferes
+/// with a sink candidate.
+static bool blockPrologueInterferes(MachineBasicBlock *BB,
+ MachineBasicBlock::iterator End,
+ MachineInstr &MI,
+ const TargetRegisterInfo *TRI,
+ const TargetInstrInfo *TII,
+ const MachineRegisterInfo *MRI) {
+ if (BB->begin() == End)
+ return false; // no prologue
+ for (MachineBasicBlock::iterator PI = BB->getFirstNonPHI(); PI != End; ++PI) {
+ // Only check target defined prologue instructions
+ if (!TII->isBasicBlockPrologue(*PI))
+ continue;
+ for (auto &MO : MI.operands()) {
+ if (!MO.isReg())
+ continue;
+ Register Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (MO.isUse()) {
+ if (Register::isPhysicalRegister(Reg) &&
+ (TII->isIgnorableUse(MO) || (MRI && MRI->isConstantPhysReg(Reg))))
+ continue;
+ if (PI->modifiesRegister(Reg, TRI))
+ return true;
+ } else {
+ if (PI->readsRegister(Reg, TRI))
+ return true;
+ // Check for interference with non-dead defs
+ auto *DefOp = PI->findRegisterDefOperand(Reg, false, true, TRI);
+ if (DefOp && !DefOp->isDead())
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
/// SinkInstruction - Determine whether it is safe to sink the specified machine
/// instruction out of its current block into a successor.
bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
@@ -1368,9 +1415,11 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
TryBreak = true;
}
- // Don't sink instructions into a loop.
- if (!TryBreak && LI->isLoopHeader(SuccToSinkTo)) {
- LLVM_DEBUG(dbgs() << " *** NOTE: Loop header found\n");
+ // Don't sink instructions into a cycle.
+ if (!TryBreak && CI->getCycle(SuccToSinkTo) &&
+ (!CI->getCycle(SuccToSinkTo)->isReducible() ||
+ CI->getCycle(SuccToSinkTo)->getHeader() == SuccToSinkTo)) {
+ LLVM_DEBUG(dbgs() << " *** NOTE: cycle header found\n");
TryBreak = true;
}
@@ -1405,9 +1454,12 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
}
// Determine where to insert into. Skip phi nodes.
- MachineBasicBlock::iterator InsertPos = SuccToSinkTo->begin();
- while (InsertPos != SuccToSinkTo->end() && InsertPos->isPHI())
- ++InsertPos;
+ MachineBasicBlock::iterator InsertPos =
+ SuccToSinkTo->SkipPHIsAndLabels(SuccToSinkTo->begin());
+ if (blockPrologueInterferes(SuccToSinkTo, InsertPos, MI, TRI, TII, MRI)) {
+ LLVM_DEBUG(dbgs() << " *** Not sinking: prologue interference\n");
+ return false;
+ }
// Collect debug users of any vreg that this inst defines.
SmallVector<MIRegs, 4> DbgUsersToSink;
@@ -1696,14 +1748,6 @@ static bool hasRegisterDependency(MachineInstr *MI,
return HasRegDependency;
}
-static SmallSet<MCRegister, 4> getRegUnits(MCRegister Reg,
- const TargetRegisterInfo *TRI) {
- SmallSet<MCRegister, 4> RegUnits;
- for (auto RI = MCRegUnitIterator(Reg, TRI); RI.isValid(); ++RI)
- RegUnits.insert(*RI);
- return RegUnits;
-}
-
bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
MachineFunction &MF,
const TargetRegisterInfo *TRI,
@@ -1749,14 +1793,15 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
}
// Record debug use of each reg unit.
- SmallSet<MCRegister, 4> RegUnits = getRegUnits(MO.getReg(), TRI);
- for (MCRegister Reg : RegUnits)
- MIUnits[Reg].push_back(MO.getReg());
+ for (auto RI = MCRegUnitIterator(MO.getReg(), TRI); RI.isValid();
+ ++RI)
+ MIUnits[*RI].push_back(MO.getReg());
}
}
if (IsValid) {
- for (auto RegOps : MIUnits)
- SeenDbgInstrs[RegOps.first].push_back({&MI, RegOps.second});
+ for (auto &RegOps : MIUnits)
+ SeenDbgInstrs[RegOps.first].emplace_back(&MI,
+ std::move(RegOps.second));
}
continue;
}
@@ -1803,22 +1848,29 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
if (!MO.isReg() || !MO.isDef())
continue;
- SmallSet<MCRegister, 4> Units = getRegUnits(MO.getReg(), TRI);
- for (MCRegister Reg : Units) {
- for (auto MIRegs : SeenDbgInstrs.lookup(Reg)) {
+ for (auto RI = MCRegUnitIterator(MO.getReg(), TRI); RI.isValid(); ++RI) {
+ for (const auto &MIRegs : SeenDbgInstrs.lookup(*RI)) {
auto &Regs = DbgValsToSinkMap[MIRegs.first];
for (unsigned Reg : MIRegs.second)
Regs.push_back(Reg);
}
}
}
- SmallVector<MIRegs, 4> DbgValsToSink(DbgValsToSinkMap.begin(),
- DbgValsToSinkMap.end());
+ auto DbgValsToSink = DbgValsToSinkMap.takeVector();
+
+ LLVM_DEBUG(dbgs() << "Sink instr " << MI << "\tinto block " << *SuccBB);
+
+ MachineBasicBlock::iterator InsertPos =
+ SuccBB->SkipPHIsAndLabels(SuccBB->begin());
+ if (blockPrologueInterferes(SuccBB, InsertPos, MI, TRI, TII, nullptr)) {
+ LLVM_DEBUG(
+ dbgs() << " *** Not sinking: prologue interference\n");
+ continue;
+ }
// Clear the kill flag if SrcReg is killed between MI and the end of the
// block.
clearKillFlags(&MI, CurBB, UsedOpsInCopy, UsedRegUnits, TRI);
- MachineBasicBlock::iterator InsertPos = SuccBB->getFirstNonPHI();
performSink(MI, *SuccBB, InsertPos, DbgValsToSink);
updateLiveIn(&MI, SuccBB, UsedOpsInCopy, DefedRegsInCopy);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineStableHash.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineStableHash.cpp
index 0803c2b8b85a..a85dbf1de1ee 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineStableHash.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineStableHash.cpp
@@ -12,29 +12,30 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MachineStableHash.h"
-#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/Analysis/Loads.h"
-#include "llvm/Analysis/MemoryLocation.h"
-#include "llvm/CodeGen/MIRFormatter.h"
-#include "llvm/CodeGen/MIRPrinter.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/ADT/ilist_iterator.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineInstrBundleIterator.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Register.h"
#include "llvm/CodeGen/StableHashing.h"
-#include "llvm/CodeGen/TargetInstrInfo.h"
-#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/IRPrintingPasses.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/ModuleSlotTracker.h"
-#include "llvm/MC/MCDwarf.h"
-#include "llvm/Target/TargetIntrinsicInfo.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Alignment.h"
+#include "llvm/Support/ErrorHandling.h"
#define DEBUG_TYPE "machine-stable-hash"
@@ -64,7 +65,10 @@ stable_hash llvm::stableHashValue(const MachineOperand &MO) {
case MachineOperand::MO_Register:
if (Register::isVirtualRegister(MO.getReg())) {
const MachineRegisterInfo &MRI = MO.getParent()->getMF()->getRegInfo();
- return MRI.getVRegDef(MO.getReg())->getOpcode();
+ SmallVector<unsigned> DefOpcodes;
+ for (auto &Def : MRI.def_instructions(MO.getReg()))
+ DefOpcodes.push_back(Def.getOpcode());
+ return hash_combine_range(DefOpcodes.begin(), DefOpcodes.end());
}
// Register operands don't have target flags.
@@ -192,3 +196,21 @@ stable_hash llvm::stableHashValue(const MachineInstr &MI, bool HashVRegs,
return stable_hash_combine_range(HashComponents.begin(),
HashComponents.end());
}
+
+stable_hash llvm::stableHashValue(const MachineBasicBlock &MBB) {
+ SmallVector<stable_hash> HashComponents;
+ // TODO: Hash more stuff like block alignment and branch probabilities.
+ for (auto &MI : MBB)
+ HashComponents.push_back(stableHashValue(MI));
+ return stable_hash_combine_range(HashComponents.begin(),
+ HashComponents.end());
+}
+
+stable_hash llvm::stableHashValue(const MachineFunction &MF) {
+ SmallVector<stable_hash> HashComponents;
+ // TODO: Hash lots more stuff like function alignment and stack objects.
+ for (auto &MBB : MF)
+ HashComponents.push_back(stableHashValue(MBB));
+ return stable_hash_combine_range(HashComponents.begin(),
+ HashComponents.end());
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineStripDebug.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineStripDebug.cpp
index 86cf4999d4b0..6128248a028e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineStripDebug.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineStripDebug.cpp
@@ -10,10 +10,10 @@
/// tests can be debugified without affecting the output MIR.
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/DebugInfo.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Utils/Debugify.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp
index c9d3e473062b..db04f2bcc095 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -32,10 +32,10 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/EHPersonalities.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
+#include "llvm/CodeGen/CodeGenCommonISel.h"
#include "llvm/CodeGen/LiveInterval.h"
-#include "llvm/CodeGen/LiveIntervalCalc.h"
#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/LiveRangeCalc.h"
#include "llvm/CodeGen/LiveStacks.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -48,6 +48,8 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/RegisterBank.h"
+#include "llvm/CodeGen/RegisterBankInfo.h"
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
@@ -55,12 +57,14 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instructions.h"
#include "llvm/InitializePasses.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCTargetOptions.h"
@@ -95,6 +99,7 @@ namespace {
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
const MachineRegisterInfo *MRI;
+ const RegisterBankInfo *RBI;
unsigned foundErrors;
@@ -370,6 +375,7 @@ unsigned MachineVerifier::verify(const MachineFunction &MF) {
TM = &MF.getTarget();
TII = MF.getSubtarget().getInstrInfo();
TRI = MF.getSubtarget().getRegisterInfo();
+ RBI = MF.getSubtarget().getRegBankInfo();
MRI = &MF.getRegInfo();
const bool isFunctionFailedISel = MF.getProperties().hasProperty(
@@ -442,7 +448,7 @@ unsigned MachineVerifier::verify(const MachineFunction &MF) {
for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
const MachineOperand &Op = MI.getOperand(I);
if (Op.getParent() != &MI) {
- // Make sure to use correct addOperand / RemoveOperand / ChangeTo
+ // Make sure to use correct addOperand / removeOperand / ChangeTo
// functions when replacing operands of a MachineInstr.
report("Instruction has operand with wrong parent set", &MI);
}
@@ -1000,17 +1006,23 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
break;
}
- if (MRI->getRegBankOrNull(Src) != MRI->getRegBankOrNull(Dst)) {
- report(
- Twine(OpcName, " source and destination register banks must match"),
- MI);
+ const RegisterBank *SrcRB = RBI->getRegBank(Src, *MRI, *TRI);
+ const RegisterBank *DstRB = RBI->getRegBank(Dst, *MRI, *TRI);
+
+ // Allow only the source bank to be set.
+ if ((SrcRB && DstRB && SrcRB != DstRB) || (DstRB && !SrcRB)) {
+ report(Twine(OpcName, " cannot change register bank"), MI);
break;
}
- if (MRI->getRegClassOrNull(Src) != MRI->getRegClassOrNull(Dst))
+ // Don't allow a class change. Do allow member class->regbank.
+ const TargetRegisterClass *DstRC = MRI->getRegClassOrNull(Dst);
+ if (DstRC && DstRC != MRI->getRegClassOrNull(Src)) {
report(
Twine(OpcName, " source and destination register classes must match"),
MI);
+ break;
+ }
break;
}
@@ -1072,6 +1084,18 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
if (ValTy.getSizeInBytes() < MMO.getSize())
report("store memory size cannot exceed value size", MI);
}
+
+ const AtomicOrdering Order = MMO.getSuccessOrdering();
+ if (Opc == TargetOpcode::G_STORE) {
+ if (Order == AtomicOrdering::Acquire ||
+ Order == AtomicOrdering::AcquireRelease)
+ report("atomic store cannot use acquire ordering", MI);
+
+ } else {
+ if (Order == AtomicOrdering::Release ||
+ Order == AtomicOrdering::AcquireRelease)
+ report("atomic load cannot use release ordering", MI);
+ }
}
break;
@@ -1628,6 +1652,43 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
verifyAllRegOpsScalar(*MI, *MRI);
break;
}
+ case TargetOpcode::G_IS_FPCLASS: {
+ LLT DestTy = MRI->getType(MI->getOperand(0).getReg());
+ LLT DestEltTy = DestTy.getScalarType();
+ if (!DestEltTy.isScalar()) {
+ report("Destination must be a scalar or vector of scalars", MI);
+ break;
+ }
+ LLT SrcTy = MRI->getType(MI->getOperand(1).getReg());
+ LLT SrcEltTy = SrcTy.getScalarType();
+ if (!SrcEltTy.isScalar()) {
+ report("Source must be a scalar or vector of scalars", MI);
+ break;
+ }
+ if (!verifyVectorElementMatch(DestTy, SrcTy, MI))
+ break;
+ const MachineOperand &TestMO = MI->getOperand(2);
+ if (!TestMO.isImm()) {
+ report("floating-point class set (operand 2) must be an immediate", MI);
+ break;
+ }
+ int64_t Test = TestMO.getImm();
+ if (Test < 0 || Test > fcAllFlags) {
+ report("Incorrect floating-point class set (operand 2)", MI);
+ break;
+ }
+ const MachineOperand &SemanticsMO = MI->getOperand(3);
+ if (!SemanticsMO.isImm()) {
+ report("floating-point semantics (operand 3) must be an immediate", MI);
+ break;
+ }
+ int64_t Semantics = SemanticsMO.getImm();
+ if (Semantics < 0 || Semantics > APFloat::S_MaxSemantics) {
+ report("Incorrect floating-point semantics (operand 3)", MI);
+ break;
+ }
+ break;
+ }
default:
break;
}
@@ -1912,6 +1973,10 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
if (MRI->tracksLiveness() && !MI->isDebugInstr())
checkLiveness(MO, MONum);
+ if (MO->isDef() && MO->isUndef() && !MO->getSubReg() &&
+ MO->getReg().isVirtual()) // TODO: Apply to physregs too
+ report("Undef virtual register def operands require a subregister", MO, MONum);
+
// Verify the consistency of tied operands.
if (MO->isTied()) {
unsigned OtherIdx = MI->findTiedOperandIdx(MONum);
@@ -2148,6 +2213,11 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
}
break;
+ case MachineOperand::MO_CFIIndex:
+ if (MO->getCFIIndex() >= MF->getFrameInstructions().size())
+ report("CFI instruction has invalid index", MO, MONum);
+ break;
+
default:
break;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MacroFusion.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MacroFusion.cpp
index b0760322064c..fa5df68b8abc 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MacroFusion.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MacroFusion.cpp
@@ -12,11 +12,10 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MacroFusion.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/CodeGen/ScheduleDAGMutation.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/Support/CommandLine.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp
index f91a9d2c3a32..3245d9649be1 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp
@@ -11,6 +11,7 @@
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/InitializePasses.h"
#include "llvm/MC/MCContext.h"
@@ -157,7 +158,7 @@ void ModuloScheduleExpander::generatePipelinedLoop() {
SmallVector<MachineBasicBlock *, 4> EpilogBBs;
// Generate the epilog instructions to complete the pipeline.
- generateEpilog(MaxStageCount, KernelBB, VRMap, EpilogBBs, PrologBBs);
+ generateEpilog(MaxStageCount, KernelBB, BB, VRMap, EpilogBBs, PrologBBs);
// We need this step because the register allocation doesn't handle some
// situations well, so we insert copies to help out.
@@ -239,11 +240,9 @@ void ModuloScheduleExpander::generateProlog(unsigned LastStage,
/// Generate the pipeline epilog code. The epilog code finishes the iterations
/// that were started in either the prolog or the kernel. We create a basic
/// block for each stage that needs to complete.
-void ModuloScheduleExpander::generateEpilog(unsigned LastStage,
- MachineBasicBlock *KernelBB,
- ValueMapTy *VRMap,
- MBBVectorTy &EpilogBBs,
- MBBVectorTy &PrologBBs) {
+void ModuloScheduleExpander::generateEpilog(
+ unsigned LastStage, MachineBasicBlock *KernelBB, MachineBasicBlock *OrigBB,
+ ValueMapTy *VRMap, MBBVectorTy &EpilogBBs, MBBVectorTy &PrologBBs) {
// We need to change the branch from the kernel to the first epilog block, so
// this call to analyze branch uses the kernel rather than the original BB.
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
@@ -313,7 +312,12 @@ void ModuloScheduleExpander::generateEpilog(unsigned LastStage,
// Create a branch to the new epilog from the kernel.
// Remove the original branch and add a new branch to the epilog.
TII->removeBranch(*KernelBB);
- TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc());
+ assert((OrigBB == TBB || OrigBB == FBB) &&
+ "Unable to determine looping branch direction");
+ if (OrigBB != TBB)
+ TII->insertBranch(*KernelBB, EpilogStart, KernelBB, Cond, DebugLoc());
+ else
+ TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc());
// Add a branch to the loop exit.
if (EpilogBBs.size() > 0) {
MachineBasicBlock *LastEpilogBB = EpilogBBs.back();
@@ -813,8 +817,8 @@ static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming) {
break;
for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2)
if (MI.getOperand(i + 1).getMBB() == Incoming) {
- MI.RemoveOperand(i + 1);
- MI.RemoveOperand(i);
+ MI.removeOperand(i + 1);
+ MI.removeOperand(i);
break;
}
}
@@ -846,7 +850,7 @@ void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,
Optional<bool> StaticallyGreater =
LoopInfo->createTripCountGreaterCondition(j + 1, *Prolog, Cond);
unsigned numAdded = 0;
- if (!StaticallyGreater.hasValue()) {
+ if (!StaticallyGreater) {
Prolog->addSuccessor(Epilog);
numAdded = TII->insertBranch(*Prolog, Epilog, LastPro, Cond, DebugLoc());
} else if (*StaticallyGreater == false) {
@@ -999,7 +1003,7 @@ MachineInstr *ModuloScheduleExpander::cloneAndChangeInstr(
}
/// Update the machine instruction with new virtual registers. This
-/// function may change the defintions and/or uses.
+/// function may change the definitions and/or uses.
void ModuloScheduleExpander::updateInstruction(MachineInstr *NewMI,
bool LastDef,
unsigned CurStageNum,
@@ -1159,8 +1163,17 @@ void ModuloScheduleExpander::rewriteScheduledInstr(
if (!InProlog && !Phi->isPHI() && StagePhi < StageSched)
ReplaceReg = NewReg;
if (ReplaceReg) {
- MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg));
- UseOp.setReg(ReplaceReg);
+ const TargetRegisterClass *NRC =
+ MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg));
+ if (NRC)
+ UseOp.setReg(ReplaceReg);
+ else {
+ Register SplitReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
+ BuildMI(*BB, UseMI, UseMI->getDebugLoc(), TII->get(TargetOpcode::COPY),
+ SplitReg)
+ .addReg(ReplaceReg);
+ UseOp.setReg(SplitReg);
+ }
}
}
}
@@ -1205,8 +1218,12 @@ void EliminateDeadPhis(MachineBasicBlock *MBB, MachineRegisterInfo &MRI,
MI.eraseFromParent();
Changed = true;
} else if (!KeepSingleSrcPhi && MI.getNumExplicitOperands() == 3) {
- MRI.constrainRegClass(MI.getOperand(1).getReg(),
- MRI.getRegClass(MI.getOperand(0).getReg()));
+ const TargetRegisterClass *ConstrainRegClass =
+ MRI.constrainRegClass(MI.getOperand(1).getReg(),
+ MRI.getRegClass(MI.getOperand(0).getReg()));
+ assert(ConstrainRegClass &&
+ "Expected a valid constrained register class!");
+ (void)ConstrainRegClass;
MRI.replaceRegWith(MI.getOperand(0).getReg(),
MI.getOperand(1).getReg());
if (LIS)
@@ -1404,7 +1421,7 @@ Register KernelRewriter::remapUse(Register Reg, MachineInstr &MI) {
while (DefaultI != Defaults.rend())
LoopReg = phi(LoopReg, *DefaultI++, MRI.getRegClass(Reg));
- if (IllegalPhiDefault.hasValue()) {
+ if (IllegalPhiDefault) {
// The consumer optionally consumes LoopProducer in the same iteration
// (because the producer is scheduled at an earlier cycle than the consumer)
// or the initial value. To facilitate this we create an illegal block here
@@ -1414,7 +1431,7 @@ Register KernelRewriter::remapUse(Register Reg, MachineInstr &MI) {
Register R = MRI.createVirtualRegister(RC);
MachineInstr *IllegalPhi =
BuildMI(*BB, MI, DebugLoc(), TII->get(TargetOpcode::PHI), R)
- .addReg(IllegalPhiDefault.getValue())
+ .addReg(*IllegalPhiDefault)
.addMBB(PreheaderBB) // Block choice is arbitrary and has no effect.
.addReg(LoopReg)
.addMBB(BB); // Block choice is arbitrary and has no effect.
@@ -1430,7 +1447,7 @@ Register KernelRewriter::remapUse(Register Reg, MachineInstr &MI) {
Register KernelRewriter::phi(Register LoopReg, Optional<Register> InitReg,
const TargetRegisterClass *RC) {
// If the init register is not undef, try and find an existing phi.
- if (InitReg.hasValue()) {
+ if (InitReg) {
auto I = Phis.find({LoopReg, InitReg.getValue()});
if (I != Phis.end())
return I->second;
@@ -1446,7 +1463,7 @@ Register KernelRewriter::phi(Register LoopReg, Optional<Register> InitReg,
auto I = UndefPhis.find(LoopReg);
if (I != UndefPhis.end()) {
Register R = I->second;
- if (!InitReg.hasValue())
+ if (!InitReg)
// Found a phi taking undef as input, and this input is undef so return
// without any more changes.
return R;
@@ -1454,7 +1471,10 @@ Register KernelRewriter::phi(Register LoopReg, Optional<Register> InitReg,
MachineInstr *MI = MRI.getVRegDef(R);
MI->getOperand(1).setReg(InitReg.getValue());
Phis.insert({{LoopReg, InitReg.getValue()}, R});
- MRI.constrainRegClass(R, MRI.getRegClass(InitReg.getValue()));
+ const TargetRegisterClass *ConstrainRegClass =
+ MRI.constrainRegClass(R, MRI.getRegClass(InitReg.getValue()));
+ assert(ConstrainRegClass && "Expected a valid constrained register class!");
+ (void)ConstrainRegClass;
UndefPhis.erase(I);
return R;
}
@@ -1463,14 +1483,18 @@ Register KernelRewriter::phi(Register LoopReg, Optional<Register> InitReg,
if (!RC)
RC = MRI.getRegClass(LoopReg);
Register R = MRI.createVirtualRegister(RC);
- if (InitReg.hasValue())
- MRI.constrainRegClass(R, MRI.getRegClass(*InitReg));
+ if (InitReg) {
+ const TargetRegisterClass *ConstrainRegClass =
+ MRI.constrainRegClass(R, MRI.getRegClass(*InitReg));
+ assert(ConstrainRegClass && "Expected a valid constrained register class!");
+ (void)ConstrainRegClass;
+ }
BuildMI(*BB, BB->getFirstNonPHI(), DebugLoc(), TII->get(TargetOpcode::PHI), R)
- .addReg(InitReg.hasValue() ? *InitReg : undef(RC))
+ .addReg(InitReg ? *InitReg : undef(RC))
.addMBB(PreheaderBB)
.addReg(LoopReg)
.addMBB(BB);
- if (!InitReg.hasValue())
+ if (!InitReg)
UndefPhis[LoopReg] = R;
else
Phis[{LoopReg, *InitReg}] = R;
@@ -1793,10 +1817,10 @@ void PeelingModuloScheduleExpander::peelPrologAndEpilogs() {
// Iterate in reverse order over all instructions, remapping as we go.
for (MachineBasicBlock *B : reverse(Blocks)) {
- for (auto I = B->getFirstInstrTerminator()->getReverseIterator();
+ for (auto I = B->instr_rbegin();
I != std::next(B->getFirstNonPHI()->getReverseIterator());) {
- MachineInstr *MI = &*I++;
- rewriteUsesOf(MI);
+ MachineBasicBlock::reverse_instr_iterator MI = I++;
+ rewriteUsesOf(&*MI);
}
}
for (auto *MI : IllegalPhisToDelete) {
@@ -1919,7 +1943,7 @@ void PeelingModuloScheduleExpander::fixupBranches() {
TII->removeBranch(*Prolog);
Optional<bool> StaticallyGreater =
LoopInfo->createTripCountGreaterCondition(TC, *Prolog, Cond);
- if (!StaticallyGreater.hasValue()) {
+ if (!StaticallyGreater) {
LLVM_DEBUG(dbgs() << "Dynamic: TC > " << TC << "\n");
// Dynamically branch based on Cond.
TII->insertBranch(*Prolog, Epilog, Fallthrough, Cond, DebugLoc());
@@ -1929,8 +1953,8 @@ void PeelingModuloScheduleExpander::fixupBranches() {
// blocks. Leave it to unreachable-block-elim to clean up.
Prolog->removeSuccessor(Fallthrough);
for (MachineInstr &P : Fallthrough->phis()) {
- P.RemoveOperand(2);
- P.RemoveOperand(1);
+ P.removeOperand(2);
+ P.removeOperand(1);
}
TII->insertUnconditionalBranch(*Prolog, Epilog, DebugLoc());
KernelDisposed = true;
@@ -1939,8 +1963,8 @@ void PeelingModuloScheduleExpander::fixupBranches() {
// Prolog always falls through; remove incoming values in epilog.
Prolog->removeSuccessor(Epilog);
for (MachineInstr &P : Epilog->phis()) {
- P.RemoveOperand(4);
- P.RemoveOperand(3);
+ P.removeOperand(4);
+ P.removeOperand(3);
}
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/NonRelocatableStringpool.cpp b/contrib/llvm-project/llvm/lib/CodeGen/NonRelocatableStringpool.cpp
index db5217469fba..7304bfef55cb 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/NonRelocatableStringpool.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/NonRelocatableStringpool.cpp
@@ -25,7 +25,7 @@ DwarfStringPoolEntryRef NonRelocatableStringpool::getEntry(StringRef S) {
Entry.Symbol = nullptr;
CurrentEndOffset += S.size() + 1;
}
- return DwarfStringPoolEntryRef(*I.first, true);
+ return DwarfStringPoolEntryRef(*I.first);
}
StringRef NonRelocatableStringpool::internString(StringRef S) {
@@ -44,7 +44,7 @@ NonRelocatableStringpool::getEntriesForEmission() const {
Result.reserve(Strings.size());
for (const auto &E : Strings)
if (E.getValue().isIndexed())
- Result.emplace_back(E, true);
+ Result.emplace_back(E);
llvm::sort(Result, [](const DwarfStringPoolEntryRef A,
const DwarfStringPoolEntryRef B) {
return A.getIndex() < B.getIndex();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/OptimizePHIs.cpp b/contrib/llvm-project/llvm/lib/CodeGen/OptimizePHIs.cpp
index 8a6cf47c0d89..d5d262e4047a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/OptimizePHIs.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/OptimizePHIs.cpp
@@ -19,7 +19,6 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp
index 7693ab417de9..7709095cd683 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp
@@ -31,9 +31,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
-#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetOpcodes.h"
-#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Pass.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ParallelCG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ParallelCG.cpp
index 3e32afaafa6e..43b23368ead2 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ParallelCG.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ParallelCG.cpp
@@ -16,8 +16,7 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Module.h"
-#include "llvm/Support/ErrorOr.h"
-#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/MemoryBufferRef.h"
#include "llvm/Support/ThreadPool.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Utils/SplitModule.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PatchableFunction.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PatchableFunction.cpp
index ca44b7a53982..0f9da0637ced 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PatchableFunction.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PatchableFunction.cpp
@@ -14,11 +14,11 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp
index f9b16d2630d6..31e37c4cd7e3 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp
@@ -90,7 +90,6 @@
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <cstdint>
@@ -214,8 +213,9 @@ namespace {
const SmallSet<Register, 2> &TargetReg,
RecurrenceCycle &RC);
- /// If copy instruction \p MI is a virtual register copy, track it in
- /// the set \p CopyMIs. If this virtual register was previously seen as a
+ /// If copy instruction \p MI is a virtual register copy or a copy of a
+ /// constant physical register to a virtual register, track it in the
+ /// set \p CopyMIs. If this virtual register was previously seen as a
/// copy, replace the uses of this copy with the previously seen copy's
/// destination register.
bool foldRedundantCopy(MachineInstr &MI,
@@ -810,7 +810,7 @@ protected:
unsigned CurrentSrcIdx = 0; ///< The index of the source being rewritten.
public:
Rewriter(MachineInstr &CopyLike) : CopyLike(CopyLike) {}
- virtual ~Rewriter() {}
+ virtual ~Rewriter() = default;
/// Get the next rewritable source (SrcReg, SrcSubReg) and
/// the related value that it affects (DstReg, DstSubReg).
@@ -1022,7 +1022,7 @@ public:
CurrentSrcIdx = -1;
// Rewrite the operation as a COPY.
// Get rid of the sub-register index.
- CopyLike.RemoveOperand(2);
+ CopyLike.removeOperand(2);
// Morph the operation into a COPY.
CopyLike.setDesc(TII.get(TargetOpcode::COPY));
return true;
@@ -1412,7 +1412,7 @@ bool PeepholeOptimizer::foldRedundantCopy(
Register SrcReg = MI.getOperand(1).getReg();
unsigned SrcSubReg = MI.getOperand(1).getSubReg();
- if (!SrcReg.isVirtual())
+ if (!SrcReg.isVirtual() && !MRI->isConstantPhysReg(SrcReg))
return false;
Register DstReg = MI.getOperand(0).getReg();
@@ -1643,8 +1643,8 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
// without any intervening re-definition of $physreg.
DenseMap<Register, MachineInstr *> NAPhysToVirtMIs;
- // Set of pairs of virtual registers and their subregs that are copied
- // from.
+ // Set of copies to virtual registers keyed by source register. Never
+ // holds any physreg which requires def tracking.
DenseMap<RegSubRegPair, MachineInstr *> CopySrcMIs;
bool IsLoopHeader = MLI->isLoopHeader(&MBB);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp
index 82ed386db827..97b1532300b1 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp
@@ -28,14 +28,11 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/InitializePasses.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Pass.h"
using namespace llvm;
#define DEBUG_TYPE "post-RA-hazard-rec"
@@ -72,10 +69,11 @@ bool PostRAHazardRecognizer::runOnMachineFunction(MachineFunction &Fn) {
TII->CreateTargetPostRAHazardRecognizer(Fn));
// Return if the target has not implemented a hazard recognizer.
- if (!HazardRec.get())
+ if (!HazardRec)
return false;
// Loop over all of the basic blocks
+ bool Changed = false;
for (auto &MBB : Fn) {
// We do not call HazardRec->reset() here to make sure we are handling noop
// hazards at the start of basic blocks.
@@ -85,6 +83,8 @@ bool PostRAHazardRecognizer::runOnMachineFunction(MachineFunction &Fn) {
HazardRec->EmitNoops(NumPreNoops);
TII->insertNoops(MBB, MachineBasicBlock::iterator(MI), NumPreNoops);
NumNoops += NumPreNoops;
+ if (NumPreNoops)
+ Changed = true;
HazardRec->EmitInstruction(&MI);
if (HazardRec->atIssueLimit()) {
@@ -92,5 +92,5 @@ bool PostRAHazardRecognizer::runOnMachineFunction(MachineFunction &Fn) {
}
}
}
- return true;
+ return Changed;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PostRASchedulerList.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PostRASchedulerList.cpp
index aac46cb22084..98fc7e07a1b4 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PostRASchedulerList.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PostRASchedulerList.cpp
@@ -25,18 +25,16 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/CodeGen/ScheduleDAGMutation.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
-#include "llvm/CodeGen/SchedulerRegistry.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
-#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -72,7 +70,7 @@ DebugMod("postra-sched-debugmod",
cl::desc("Debug control MBBs that are scheduled"),
cl::init(0), cl::Hidden);
-AntiDepBreaker::~AntiDepBreaker() { }
+AntiDepBreaker::~AntiDepBreaker() = default;
namespace {
class PostRAScheduler : public MachineFunctionPass {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
index 74b903f99284..1115c2a27956 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
@@ -18,10 +18,8 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
-#include "llvm/IR/User.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
index d232ca3a69c3..7327f9e52efc 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -11,10 +11,11 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -45,6 +46,11 @@ public:
void getAnalysisUsage(AnalysisUsage &au) const override;
bool runOnMachineFunction(MachineFunction &MF) override;
+
+ virtual MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::IsSSA);
+ }
};
} // end anonymous namespace
@@ -124,7 +130,7 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
// Using instr wasn't found, it could be in another block.
// Leave the physreg IMPLICIT_DEF, but trim any extra operands.
for (unsigned i = MI->getNumOperands() - 1; i; --i)
- MI->RemoveOperand(i);
+ MI->removeOperand(i);
LLVM_DEBUG(dbgs() << "Keeping physreg: " << *MI);
}
@@ -140,7 +146,6 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &MF) {
TII = MF.getSubtarget().getInstrInfo();
TRI = MF.getSubtarget().getRegisterInfo();
MRI = &MF.getRegInfo();
- assert(MRI->isSSA() && "ProcessImplicitDefs only works on SSA form.");
assert(WorkList.empty() && "Inconsistent worklist state");
for (MachineBasicBlock &MBB : MF) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index 8d8a6126dad0..1a0f296d5fdc 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -55,10 +55,8 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
#include "llvm/Support/CodeGen.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
@@ -130,6 +128,7 @@ private:
void replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF,
int &SPAdj);
void insertPrologEpilogCode(MachineFunction &MF);
+ void insertZeroCallUsedRegs(MachineFunction &MF);
};
} // end anonymous namespace
@@ -284,6 +283,9 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) {
assert(!Failed && "Invalid warn-stack-size fn attr value");
(void)Failed;
}
+ if (MF.getFunction().hasFnAttribute(Attribute::SafeStack)) {
+ StackSize += MFI.getUnsafeStackSize();
+ }
if (StackSize > Threshold) {
DiagnosticInfoStackSize DiagStackSize(F, StackSize, Threshold, DS_Warning);
F.getContext().diagnose(DiagStackSize);
@@ -837,8 +839,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
// Adjust 'Offset' to point to the end of last fixed sized preallocated
// object.
for (int i = MFI.getObjectIndexBegin(); i != 0; ++i) {
- if (MFI.getStackID(i) !=
- TargetStackID::Default) // Only allocate objects on the default stack.
+ // Only allocate objects on the default stack.
+ if (MFI.getStackID(i) != TargetStackID::Default)
continue;
int64_t FixedOff;
@@ -855,47 +857,34 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
if (FixedOff > Offset) Offset = FixedOff;
}
+ Align MaxAlign = MFI.getMaxAlign();
// First assign frame offsets to stack objects that are used to spill
// callee saved registers.
- if (StackGrowsDown && MaxCSFrameIndex >= MinCSFrameIndex) {
- for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) {
- if (MFI.getStackID(i) !=
- TargetStackID::Default) // Only allocate objects on the default stack.
- continue;
+ if (MaxCSFrameIndex >= MinCSFrameIndex) {
+ for (unsigned i = 0; i <= MaxCSFrameIndex - MinCSFrameIndex; ++i) {
+ unsigned FrameIndex =
+ StackGrowsDown ? MinCSFrameIndex + i : MaxCSFrameIndex - i;
- // If the stack grows down, we need to add the size to find the lowest
- // address of the object.
- Offset += MFI.getObjectSize(i);
-
- // Adjust to alignment boundary
- Offset = alignTo(Offset, MFI.getObjectAlign(i), Skew);
-
- LLVM_DEBUG(dbgs() << "alloc FI(" << i << ") at SP[" << -Offset << "]\n");
- MFI.setObjectOffset(i, -Offset); // Set the computed offset
- }
- } else if (MaxCSFrameIndex >= MinCSFrameIndex) {
- // Be careful about underflow in comparisons agains MinCSFrameIndex.
- for (unsigned i = MaxCSFrameIndex; i != MinCSFrameIndex - 1; --i) {
- if (MFI.getStackID(i) !=
- TargetStackID::Default) // Only allocate objects on the default stack.
+ // Only allocate objects on the default stack.
+ if (MFI.getStackID(FrameIndex) != TargetStackID::Default)
continue;
- if (MFI.isDeadObjectIndex(i))
+ // TODO: should this just be if (MFI.isDeadObjectIndex(FrameIndex))
+ if (!StackGrowsDown && MFI.isDeadObjectIndex(FrameIndex))
continue;
- // Adjust to alignment boundary
- Offset = alignTo(Offset, MFI.getObjectAlign(i), Skew);
-
- LLVM_DEBUG(dbgs() << "alloc FI(" << i << ") at SP[" << Offset << "]\n");
- MFI.setObjectOffset(i, Offset);
- Offset += MFI.getObjectSize(i);
+ AdjustStackOffset(MFI, FrameIndex, StackGrowsDown, Offset, MaxAlign,
+ Skew);
}
}
+ assert(MaxAlign == MFI.getMaxAlign() &&
+ "MFI.getMaxAlign should already account for all callee-saved "
+ "registers without a fixed stack slot");
+
// FixedCSEnd is the stack offset to the end of the fixed and callee-save
// stack area.
int64_t FixedCSEnd = Offset;
- Align MaxAlign = MFI.getMaxAlign();
// Make sure the special register scavenging spill slot is closest to the
// incoming stack pointer if a frame pointer is required and is closer
@@ -982,8 +971,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
continue;
if (StackProtectorFI == (int)i || EHRegNodeFrameIndex == (int)i)
continue;
- if (MFI.getStackID(i) !=
- TargetStackID::Default) // Only allocate objects on the default stack.
+ // Only allocate objects on the default stack.
+ if (MFI.getStackID(i) != TargetStackID::Default)
continue;
switch (MFI.getObjectSSPLayout(i)) {
@@ -1036,8 +1025,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
continue;
if (ProtectedObjs.count(i))
continue;
- if (MFI.getStackID(i) !=
- TargetStackID::Default) // Only allocate objects on the default stack.
+ // Only allocate objects on the default stack.
+ if (MFI.getStackID(i) != TargetStackID::Default)
continue;
// Add the objects that we need to allocate to our working set.
@@ -1145,6 +1134,9 @@ void PEI::insertPrologEpilogCode(MachineFunction &MF) {
for (MachineBasicBlock *RestoreBlock : RestoreBlocks)
TFI.emitEpilogue(MF, *RestoreBlock);
+ // Zero call used registers before restoring callee-saved registers.
+ insertZeroCallUsedRegs(MF);
+
for (MachineBasicBlock *SaveBlock : SaveBlocks)
TFI.inlineStackProbe(MF, *SaveBlock);
@@ -1155,11 +1147,7 @@ void PEI::insertPrologEpilogCode(MachineFunction &MF) {
if (MF.shouldSplitStack()) {
for (MachineBasicBlock *SaveBlock : SaveBlocks)
TFI.adjustForSegmentedStacks(MF, *SaveBlock);
- // Record that there are split-stack functions, so we will emit a
- // special section to tell the linker.
- MF.getMMI().setHasSplitStack(true);
- } else
- MF.getMMI().setHasNosplitStack(true);
+ }
// Emit additional code that is required to explicitly handle the stack in
// HiPE native code (if needed) when loaded in the Erlang/OTP runtime. The
@@ -1171,6 +1159,120 @@ void PEI::insertPrologEpilogCode(MachineFunction &MF) {
TFI.adjustForHiPEPrologue(MF, *SaveBlock);
}
+/// insertZeroCallUsedRegs - Zero out call used registers.
+void PEI::insertZeroCallUsedRegs(MachineFunction &MF) {
+ const Function &F = MF.getFunction();
+
+ if (!F.hasFnAttribute("zero-call-used-regs"))
+ return;
+
+ using namespace ZeroCallUsedRegs;
+
+ ZeroCallUsedRegsKind ZeroRegsKind =
+ StringSwitch<ZeroCallUsedRegsKind>(
+ F.getFnAttribute("zero-call-used-regs").getValueAsString())
+ .Case("skip", ZeroCallUsedRegsKind::Skip)
+ .Case("used-gpr-arg", ZeroCallUsedRegsKind::UsedGPRArg)
+ .Case("used-gpr", ZeroCallUsedRegsKind::UsedGPR)
+ .Case("used-arg", ZeroCallUsedRegsKind::UsedArg)
+ .Case("used", ZeroCallUsedRegsKind::Used)
+ .Case("all-gpr-arg", ZeroCallUsedRegsKind::AllGPRArg)
+ .Case("all-gpr", ZeroCallUsedRegsKind::AllGPR)
+ .Case("all-arg", ZeroCallUsedRegsKind::AllArg)
+ .Case("all", ZeroCallUsedRegsKind::All);
+
+ if (ZeroRegsKind == ZeroCallUsedRegsKind::Skip)
+ return;
+
+ const bool OnlyGPR = static_cast<unsigned>(ZeroRegsKind) & ONLY_GPR;
+ const bool OnlyUsed = static_cast<unsigned>(ZeroRegsKind) & ONLY_USED;
+ const bool OnlyArg = static_cast<unsigned>(ZeroRegsKind) & ONLY_ARG;
+
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+ const BitVector AllocatableSet(TRI.getAllocatableSet(MF));
+
+ // Mark all used registers.
+ BitVector UsedRegs(TRI.getNumRegs());
+ if (OnlyUsed)
+ for (const MachineBasicBlock &MBB : MF)
+ for (const MachineInstr &MI : MBB)
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg())
+ continue;
+
+ MCRegister Reg = MO.getReg();
+ if (AllocatableSet[Reg] && !MO.isImplicit() &&
+ (MO.isDef() || MO.isUse()))
+ UsedRegs.set(Reg);
+ }
+
+ BitVector RegsToZero(TRI.getNumRegs());
+ for (MCRegister Reg : AllocatableSet.set_bits()) {
+ // Skip over fixed registers.
+ if (TRI.isFixedRegister(MF, Reg))
+ continue;
+
+ // Want only general purpose registers.
+ if (OnlyGPR && !TRI.isGeneralPurposeRegister(MF, Reg))
+ continue;
+
+ // Want only used registers.
+ if (OnlyUsed && !UsedRegs[Reg])
+ continue;
+
+ // Want only registers used for arguments.
+ if (OnlyArg && !TRI.isArgumentRegister(MF, Reg))
+ continue;
+
+ RegsToZero.set(Reg);
+ }
+
+ // Don't clear registers that are live when leaving the function.
+ for (const MachineBasicBlock &MBB : MF)
+ for (const MachineInstr &MI : MBB.terminators()) {
+ if (!MI.isReturn())
+ continue;
+
+ for (const auto &MO : MI.operands()) {
+ if (!MO.isReg())
+ continue;
+
+ for (MCPhysReg SReg : TRI.sub_and_superregs_inclusive(MO.getReg()))
+ RegsToZero.reset(SReg);
+ }
+ }
+
+ // Don't need to clear registers that are used/clobbered by terminating
+ // instructions.
+ for (const MachineBasicBlock &MBB : MF) {
+ if (!MBB.isReturnBlock())
+ continue;
+
+ MachineBasicBlock::const_iterator MBBI = MBB.getFirstTerminator();
+ for (MachineBasicBlock::const_iterator I = MBBI, E = MBB.end(); I != E;
+ ++I) {
+ for (const MachineOperand &MO : I->operands()) {
+ if (!MO.isReg())
+ continue;
+
+ for (const MCPhysReg &Reg :
+ TRI.sub_and_superregs_inclusive(MO.getReg()))
+ RegsToZero.reset(Reg);
+ }
+ }
+ }
+
+ // Don't clear registers that are reset before exiting.
+ for (const CalleeSavedInfo &CSI : MF.getFrameInfo().getCalleeSavedInfo())
+ for (MCRegister Reg : TRI.sub_and_superregs_inclusive(CSI.getReg()))
+ RegsToZero.reset(Reg);
+
+ const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
+ for (MachineBasicBlock &MBB : MF)
+ if (MBB.isReturnBlock())
+ TFI.emitZeroCallUsedRegs(RegsToZero, MBB);
+}
+
/// replaceFrameIndices - Replace all MO_FrameIndex operands with physical
/// register references and actual offsets.
void PEI::replaceFrameIndices(MachineFunction &MF) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PseudoProbeInserter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PseudoProbeInserter.cpp
index 5f69f9194125..86ea3ec67178 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PseudoProbeInserter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PseudoProbeInserter.cpp
@@ -18,11 +18,9 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/Module.h"
#include "llvm/IR/PseudoProbe.h"
#include "llvm/InitializePasses.h"
-#include "llvm/MC/MCPseudoProbe.h"
-#include "llvm/Target/TargetMachine.h"
-#include <unordered_set>
#define DEBUG_TYPE "pseudo-probe-inserter"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PseudoSourceValue.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PseudoSourceValue.cpp
index 74e721dbd138..40c52b9d9707 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PseudoSourceValue.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PseudoSourceValue.cpp
@@ -11,26 +11,23 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/TargetInstrInfo.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+
using namespace llvm;
static const char *const PSVNames[] = {
"Stack", "GOT", "JumpTable", "ConstantPool", "FixedStack",
"GlobalValueCallEntry", "ExternalSymbolCallEntry"};
-PseudoSourceValue::PseudoSourceValue(unsigned Kind, const TargetInstrInfo &TII)
+PseudoSourceValue::PseudoSourceValue(unsigned Kind, const TargetMachine &TM)
: Kind(Kind) {
- AddressSpace = TII.getAddressSpaceForPseudoSourceKind(Kind);
+ AddressSpace = TM.getAddressSpaceForPseudoSourceKind(Kind);
}
-
-PseudoSourceValue::~PseudoSourceValue() {}
+PseudoSourceValue::~PseudoSourceValue() = default;
void PseudoSourceValue::printCustom(raw_ostream &O) const {
if (Kind < TargetCustom)
@@ -79,9 +76,9 @@ void FixedStackPseudoSourceValue::printCustom(raw_ostream &OS) const {
OS << "FixedStack" << FI;
}
-CallEntryPseudoSourceValue::CallEntryPseudoSourceValue(
- unsigned Kind, const TargetInstrInfo &TII)
- : PseudoSourceValue(Kind, TII) {}
+CallEntryPseudoSourceValue::CallEntryPseudoSourceValue(unsigned Kind,
+ const TargetMachine &TM)
+ : PseudoSourceValue(Kind, TM) {}
bool CallEntryPseudoSourceValue::isConstant(const MachineFrameInfo *) const {
return false;
@@ -96,20 +93,17 @@ bool CallEntryPseudoSourceValue::mayAlias(const MachineFrameInfo *) const {
}
GlobalValuePseudoSourceValue::GlobalValuePseudoSourceValue(
- const GlobalValue *GV,
- const TargetInstrInfo &TII)
- : CallEntryPseudoSourceValue(GlobalValueCallEntry, TII), GV(GV) {}
+ const GlobalValue *GV, const TargetMachine &TM)
+ : CallEntryPseudoSourceValue(GlobalValueCallEntry, TM), GV(GV) {}
ExternalSymbolPseudoSourceValue::ExternalSymbolPseudoSourceValue(
- const char *ES, const TargetInstrInfo &TII)
- : CallEntryPseudoSourceValue(ExternalSymbolCallEntry, TII), ES(ES) {}
+ const char *ES, const TargetMachine &TM)
+ : CallEntryPseudoSourceValue(ExternalSymbolCallEntry, TM), ES(ES) {}
-PseudoSourceValueManager::PseudoSourceValueManager(
- const TargetInstrInfo &TIInfo)
- : TII(TIInfo),
- StackPSV(PseudoSourceValue::Stack, TII),
- GOTPSV(PseudoSourceValue::GOT, TII),
- JumpTablePSV(PseudoSourceValue::JumpTable, TII),
- ConstantPoolPSV(PseudoSourceValue::ConstantPool, TII) {}
+PseudoSourceValueManager::PseudoSourceValueManager(const TargetMachine &TMInfo)
+ : TM(TMInfo), StackPSV(PseudoSourceValue::Stack, TM),
+ GOTPSV(PseudoSourceValue::GOT, TM),
+ JumpTablePSV(PseudoSourceValue::JumpTable, TM),
+ ConstantPoolPSV(PseudoSourceValue::ConstantPool, TM) {}
const PseudoSourceValue *PseudoSourceValueManager::getStack() {
return &StackPSV;
@@ -129,7 +123,7 @@ const PseudoSourceValue *
PseudoSourceValueManager::getFixedStack(int FI) {
std::unique_ptr<FixedStackPseudoSourceValue> &V = FSValues[FI];
if (!V)
- V = std::make_unique<FixedStackPseudoSourceValue>(FI, TII);
+ V = std::make_unique<FixedStackPseudoSourceValue>(FI, TM);
return V.get();
}
@@ -138,7 +132,7 @@ PseudoSourceValueManager::getGlobalValueCallEntry(const GlobalValue *GV) {
std::unique_ptr<const GlobalValuePseudoSourceValue> &E =
GlobalCallEntries[GV];
if (!E)
- E = std::make_unique<GlobalValuePseudoSourceValue>(GV, TII);
+ E = std::make_unique<GlobalValuePseudoSourceValue>(GV, TM);
return E.get();
}
@@ -147,6 +141,6 @@ PseudoSourceValueManager::getExternalSymbolCallEntry(const char *ES) {
std::unique_ptr<const ExternalSymbolPseudoSourceValue> &E =
ExternalCallEntries[ES];
if (!E)
- E = std::make_unique<ExternalSymbolPseudoSourceValue>(ES, TII);
+ E = std::make_unique<ExternalSymbolPseudoSourceValue>(ES, TM);
return E.get();
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RDFGraph.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RDFGraph.cpp
index 882f8e91bf1d..ec383b9b1c65 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RDFGraph.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RDFGraph.cpp
@@ -8,6 +8,7 @@
//
// Target-independent, SSA-based data flow graph for register data flow (RDF).
//
+#include "llvm/CodeGen/RDFGraph.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
@@ -18,7 +19,6 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/RDFGraph.h"
#include "llvm/CodeGen/RDFRegisters.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -27,8 +27,6 @@
#include "llvm/IR/Function.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/MC/MCInstrDesc.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
@@ -979,18 +977,6 @@ RegisterRef DataFlowGraph::makeRegRef(const MachineOperand &Op) const {
return RegisterRef(PRI.getRegMaskId(Op.getRegMask()), LaneBitmask::getAll());
}
-RegisterRef DataFlowGraph::restrictRef(RegisterRef AR, RegisterRef BR) const {
- if (AR.Reg == BR.Reg) {
- LaneBitmask M = AR.Mask & BR.Mask;
- return M.any() ? RegisterRef(AR.Reg, M) : RegisterRef();
- }
- // This isn't strictly correct, because the overlap may happen in the
- // part masked out.
- if (PRI.alias(AR, BR))
- return AR;
- return RegisterRef();
-}
-
// For each stack in the map DefM, push the delimiter for block B on it.
void DataFlowGraph::markBlock(NodeId B, DefStackMap &DefM) {
// Push block delimiters.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RDFLiveness.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RDFLiveness.cpp
index d704cf7b3213..2fd947086b4d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RDFLiveness.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RDFLiveness.cpp
@@ -22,6 +22,7 @@
// and Embedded Architectures and Compilers", 8 (4),
// <10.1145/2086696.2086706>. <hal-00647369>
//
+#include "llvm/CodeGen/RDFLiveness.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
@@ -32,14 +33,12 @@
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/RDFLiveness.h"
#include "llvm/CodeGen/RDFGraph.h"
#include "llvm/CodeGen/RDFRegisters.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
@@ -341,9 +340,8 @@ Liveness::getAllReachingDefsRecImpl(RegisterRef RefRR, NodeAddr<RefNode*> RefA,
if (!(DA.Addr->getFlags() & NodeAttrs::PhiRef))
continue;
NodeAddr<PhiNode*> PA = DA.Addr->getOwner(DFG);
- if (Visited.count(PA.Id))
+ if (!Visited.insert(PA.Id).second)
continue;
- Visited.insert(PA.Id);
// Go over all phi uses and get the reaching defs for each use.
for (auto U : PA.Addr->members_if(DFG.IsRef<NodeAttrs::Use>, DFG)) {
const auto &T = getAllReachingDefsRecImpl(RefRR, U, Visited, TmpDefs,
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
index 1264e6021b6e..69db8bad54f9 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
@@ -34,12 +34,7 @@ static bool isValidRegUseOf(const MachineOperand &MO, MCRegister PhysReg,
const TargetRegisterInfo *TRI) {
if (!isValidRegUse(MO))
return false;
- if (MO.getReg() == PhysReg)
- return true;
- for (MCRegAliasIterator R(PhysReg, TRI, false); R.isValid(); ++R)
- if (MO.getReg() == *R)
- return true;
- return false;
+ return TRI->regsOverlap(MO.getReg(), PhysReg);
}
static bool isValidRegDef(const MachineOperand &MO) {
@@ -50,12 +45,7 @@ static bool isValidRegDefOf(const MachineOperand &MO, MCRegister PhysReg,
const TargetRegisterInfo *TRI) {
if (!isValidRegDef(MO))
return false;
- if (MO.getReg() == PhysReg)
- return true;
- for (MCRegAliasIterator R(PhysReg, TRI, false); R.isValid(); ++R)
- if (MO.getReg() == *R)
- return true;
- return false;
+ return TRI->regsOverlap(MO.getReg(), PhysReg);
}
void ReachingDefAnalysis::enterBasicBlock(MachineBasicBlock *MBB) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.cpp
index d891d4c2ffbb..0c18814189eb 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.cpp
@@ -85,7 +85,7 @@ void RegAllocBase::allocatePhysRegs() {
seedLiveRegs();
// Continue assigning vregs one at a time to available physical registers.
- while (LiveInterval *VirtReg = dequeue()) {
+ while (const LiveInterval *VirtReg = dequeue()) {
assert(!VRM->hasPhys(VirtReg->reg()) && "Register already assigned");
// Unused registers can appear when the spiller coalesces snippets.
@@ -140,10 +140,7 @@ void RegAllocBase::allocatePhysRegs() {
// Keep going after reporting the error.
VRM->assignVirt2Phys(VirtReg->reg(), AllocOrder.front());
- continue;
- }
-
- if (AvailablePhysReg)
+ } else if (AvailablePhysReg)
Matrix->assign(*VirtReg, AvailablePhysReg);
for (Register Reg : SplitVRegs) {
@@ -176,7 +173,7 @@ void RegAllocBase::postOptimization() {
DeadRemats.clear();
}
-void RegAllocBase::enqueue(LiveInterval *LI) {
+void RegAllocBase::enqueue(const LiveInterval *LI) {
const Register Reg = LI->reg();
assert(Reg.isVirtual() && "Can only enqueue virtual registers");
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.h b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.h
index 1fb56dbaebb7..a8bf305a50c9 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.h
@@ -96,19 +96,19 @@ protected:
virtual Spiller &spiller() = 0;
/// enqueue - Add VirtReg to the priority queue of unassigned registers.
- virtual void enqueueImpl(LiveInterval *LI) = 0;
+ virtual void enqueueImpl(const LiveInterval *LI) = 0;
/// enqueue - Add VirtReg to the priority queue of unassigned registers.
- void enqueue(LiveInterval *LI);
+ void enqueue(const LiveInterval *LI);
/// dequeue - Return the next unassigned register, or NULL.
- virtual LiveInterval *dequeue() = 0;
+ virtual const LiveInterval *dequeue() = 0;
// A RegAlloc pass should override this to provide the allocation heuristics.
// Each call must guarantee forward progess by returning an available PhysReg
// or new set of split live virtual registers. It is up to the splitter to
// converge quickly toward fully spilled live ranges.
- virtual MCRegister selectOrSplit(LiveInterval &VirtReg,
+ virtual MCRegister selectOrSplit(const LiveInterval &VirtReg,
SmallVectorImpl<Register> &splitLVRs) = 0;
// Use this group name for NamedRegionTimer.
@@ -116,7 +116,7 @@ protected:
static const char TimerGroupDescription[];
/// Method called when the allocator is about to remove a LiveInterval.
- virtual void aboutToRemoveInterval(LiveInterval &LI) {}
+ virtual void aboutToRemoveInterval(const LiveInterval &LI) {}
public:
/// VerifyEnabled - True when -verify-regalloc is given.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBasic.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBasic.cpp
index a9816b13e798..7defdf04aec8 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBasic.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBasic.cpp
@@ -22,9 +22,7 @@
#include "llvm/CodeGen/LiveStacks.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/CodeGen/Spiller.h"
@@ -33,7 +31,6 @@
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include <cstdlib>
#include <queue>
using namespace llvm;
@@ -45,7 +42,7 @@ static RegisterRegAlloc basicRegAlloc("basic", "basic register allocator",
namespace {
struct CompSpillWeight {
- bool operator()(LiveInterval *A, LiveInterval *B) const {
+ bool operator()(const LiveInterval *A, const LiveInterval *B) const {
return A->weight() < B->weight();
}
};
@@ -65,8 +62,9 @@ class RABasic : public MachineFunctionPass,
// state
std::unique_ptr<Spiller> SpillerInstance;
- std::priority_queue<LiveInterval*, std::vector<LiveInterval*>,
- CompSpillWeight> Queue;
+ std::priority_queue<const LiveInterval *, std::vector<const LiveInterval *>,
+ CompSpillWeight>
+ Queue;
// Scratch space. Allocated here to avoid repeated malloc calls in
// selectOrSplit().
@@ -88,19 +86,17 @@ public:
Spiller &spiller() override { return *SpillerInstance; }
- void enqueueImpl(LiveInterval *LI) override {
- Queue.push(LI);
- }
+ void enqueueImpl(const LiveInterval *LI) override { Queue.push(LI); }
- LiveInterval *dequeue() override {
+ const LiveInterval *dequeue() override {
if (Queue.empty())
return nullptr;
- LiveInterval *LI = Queue.top();
+ const LiveInterval *LI = Queue.top();
Queue.pop();
return LI;
}
- MCRegister selectOrSplit(LiveInterval &VirtReg,
+ MCRegister selectOrSplit(const LiveInterval &VirtReg,
SmallVectorImpl<Register> &SplitVRegs) override;
/// Perform register allocation.
@@ -119,7 +115,7 @@ public:
// Helper for spilling all live virtual registers currently unified under preg
// that interfere with the most recently queried lvr. Return true if spilling
// was successful, and append any new spilled/split intervals to splitLVRs.
- bool spillInterferences(LiveInterval &VirtReg, MCRegister PhysReg,
+ bool spillInterferences(const LiveInterval &VirtReg, MCRegister PhysReg,
SmallVectorImpl<Register> &SplitVRegs);
static char ID;
@@ -208,16 +204,17 @@ void RABasic::releaseMemory() {
// Spill or split all live virtual registers currently unified under PhysReg
// that interfere with VirtReg. The newly spilled or split live intervals are
// returned by appending them to SplitVRegs.
-bool RABasic::spillInterferences(LiveInterval &VirtReg, MCRegister PhysReg,
+bool RABasic::spillInterferences(const LiveInterval &VirtReg,
+ MCRegister PhysReg,
SmallVectorImpl<Register> &SplitVRegs) {
// Record each interference and determine if all are spillable before mutating
// either the union or live intervals.
- SmallVector<LiveInterval*, 8> Intfs;
+ SmallVector<const LiveInterval *, 8> Intfs;
// Collect interferences assigned to any alias of the physical register.
for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
- for (auto *Intf : reverse(Q.interferingVRegs())) {
+ for (const auto *Intf : reverse(Q.interferingVRegs())) {
if (!Intf->isSpillable() || Intf->weight() > VirtReg.weight())
return false;
Intfs.push_back(Intf);
@@ -229,7 +226,7 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, MCRegister PhysReg,
// Spill each interfering vreg allocated to PhysReg or an alias.
for (unsigned i = 0, e = Intfs.size(); i != e; ++i) {
- LiveInterval &Spill = *Intfs[i];
+ const LiveInterval &Spill = *Intfs[i];
// Skip duplicates.
if (!VRM->hasPhys(Spill.reg()))
@@ -258,7 +255,7 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, MCRegister PhysReg,
// |vregs| * |machineregs|. And since the number of interference tests is
// minimal, there is no value in caching them outside the scope of
// selectOrSplit().
-MCRegister RABasic::selectOrSplit(LiveInterval &VirtReg,
+MCRegister RABasic::selectOrSplit(const LiveInterval &VirtReg,
SmallVectorImpl<Register> &SplitVRegs) {
// Populate a list of physical register spill candidates.
SmallVector<MCRegister, 8> PhysRegSpillCands;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp
index fc5d1104a999..ee03feda796f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp
@@ -11,13 +11,14 @@
//===----------------------------------------------------------------------===//
#include "RegAllocEvictionAdvisor.h"
+#include "AllocationOrder.h"
#include "RegAllocGreedy.h"
+#include "llvm/CodeGen/LiveRegMatrix.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
-#include "llvm/PassRegistry.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetMachine.h"
@@ -25,7 +26,7 @@
using namespace llvm;
static cl::opt<RegAllocEvictionAdvisorAnalysis::AdvisorMode> Mode(
- "regalloc-enable-advisor", cl::Hidden, cl::ZeroOrMore,
+ "regalloc-enable-advisor", cl::Hidden,
cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default),
cl::desc("Enable regalloc advisor mode"),
cl::values(
@@ -42,6 +43,14 @@ static cl::opt<bool> EnableLocalReassignment(
"may be compile time intensive"),
cl::init(false));
+cl::opt<unsigned> EvictInterferenceCutoff(
+ "regalloc-eviction-max-interference-cutoff", cl::Hidden,
+ cl::desc("Number of interferences after which we declare "
+ "an interference unevictable and bail out. This "
+ "is a compilation cost-saving consideration. To "
+ "disable, pass a very large number."),
+ cl::init(10));
+
#define DEBUG_TYPE "regalloc"
#ifdef LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL
#define LLVM_HAVE_TF_AOT
@@ -66,7 +75,7 @@ public:
private:
std::unique_ptr<RegAllocEvictionAdvisor>
- getAdvisor(MachineFunction &MF, const RAGreedy &RA) override {
+ getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override {
return std::make_unique<DefaultEvictionAdvisor>(MF, RA);
}
bool doInitialization(Module &M) override {
@@ -113,7 +122,7 @@ StringRef RegAllocEvictionAdvisorAnalysis::getPassName() const {
llvm_unreachable("Unknown advisor kind");
}
-RegAllocEvictionAdvisor::RegAllocEvictionAdvisor(MachineFunction &MF,
+RegAllocEvictionAdvisor::RegAllocEvictionAdvisor(const MachineFunction &MF,
const RAGreedy &RA)
: MF(MF), RA(RA), Matrix(RA.getInterferenceMatrix()),
LIS(RA.getLiveIntervals()), VRM(RA.getVirtRegMap()),
@@ -136,8 +145,8 @@ RegAllocEvictionAdvisor::RegAllocEvictionAdvisor(MachineFunction &MF,
/// register.
/// @param B The live range to be evicted.
/// @param BreaksHint True when B is already assigned to its preferred register.
-bool DefaultEvictionAdvisor::shouldEvict(LiveInterval &A, bool IsHint,
- LiveInterval &B,
+bool DefaultEvictionAdvisor::shouldEvict(const LiveInterval &A, bool IsHint,
+ const LiveInterval &B,
bool BreaksHint) const {
bool CanSplit = RA.getExtraInfo().getStage(B) < RS_Spill;
@@ -156,7 +165,7 @@ bool DefaultEvictionAdvisor::shouldEvict(LiveInterval &A, bool IsHint,
/// canEvictHintInterference - return true if the interference for VirtReg
/// on the PhysReg, which is VirtReg's hint, can be evicted in favor of VirtReg.
bool DefaultEvictionAdvisor::canEvictHintInterference(
- LiveInterval &VirtReg, MCRegister PhysReg,
+ const LiveInterval &VirtReg, MCRegister PhysReg,
const SmallVirtRegSet &FixedRegisters) const {
EvictionCost MaxCost;
MaxCost.setBrokenHints(1);
@@ -174,7 +183,7 @@ bool DefaultEvictionAdvisor::canEvictHintInterference(
/// when returning true.
/// @returns True when interference can be evicted cheaper than MaxCost.
bool DefaultEvictionAdvisor::canEvictInterferenceBasedOnCost(
- LiveInterval &VirtReg, MCRegister PhysReg, bool IsHint,
+ const LiveInterval &VirtReg, MCRegister PhysReg, bool IsHint,
EvictionCost &MaxCost, const SmallVirtRegSet &FixedRegisters) const {
// It is only possible to evict virtual register interference.
if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg)
@@ -195,12 +204,12 @@ bool DefaultEvictionAdvisor::canEvictInterferenceBasedOnCost(
for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
// If there is 10 or more interferences, chances are one is heavier.
- const auto &Interferences = Q.interferingVRegs(10);
- if (Interferences.size() >= 10)
+ const auto &Interferences = Q.interferingVRegs(EvictInterferenceCutoff);
+ if (Interferences.size() >= EvictInterferenceCutoff)
return false;
// Check if any interfering live range is heavier than MaxWeight.
- for (LiveInterval *Intf : reverse(Interferences)) {
+ for (const LiveInterval *Intf : reverse(Interferences)) {
assert(Register::isVirtualRegister(Intf->reg()) &&
"Only expecting virtual register interference from query");
@@ -227,7 +236,10 @@ bool DefaultEvictionAdvisor::canEvictInterferenceBasedOnCost(
MRI->getRegClass(Intf->reg())));
// Only evict older cascades or live ranges without a cascade.
unsigned IntfCascade = RA.getExtraInfo().getCascade(Intf->reg());
- if (Cascade <= IntfCascade) {
+ if (Cascade == IntfCascade)
+ return false;
+
+ if (Cascade < IntfCascade) {
if (!Urgent)
return false;
// We permit breaking cascades for urgent evictions. It should be the
@@ -261,7 +273,7 @@ bool DefaultEvictionAdvisor::canEvictInterferenceBasedOnCost(
}
MCRegister DefaultEvictionAdvisor::tryFindEvictionCandidate(
- LiveInterval &VirtReg, const AllocationOrder &Order,
+ const LiveInterval &VirtReg, const AllocationOrder &Order,
uint8_t CostPerUseLimit, const SmallVirtRegSet &FixedRegisters) const {
// Keep track of the cheapest interference seen so far.
EvictionCost BestCost;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h
index 1f40386db8da..d57b0ca6d53d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h
@@ -9,19 +9,25 @@
#ifndef LLVM_CODEGEN_REGALLOCEVICTIONADVISOR_H
#define LLVM_CODEGEN_REGALLOCEVICTIONADVISOR_H
-#include "AllocationOrder.h"
-#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallSet.h"
-#include "llvm/CodeGen/LiveInterval.h"
-#include "llvm/CodeGen/LiveIntervals.h"
-#include "llvm/CodeGen/LiveRegMatrix.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/Register.h"
-#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/Config/llvm-config.h"
+#include "llvm/MC/MCRegister.h"
#include "llvm/Pass.h"
namespace llvm {
+class AllocationOrder;
+class LiveInterval;
+class LiveIntervals;
+class LiveRegMatrix;
+class MachineFunction;
+class MachineRegisterInfo;
+class RegisterClassInfo;
+class TargetRegisterInfo;
+class VirtRegMap;
using SmallVirtRegSet = SmallSet<Register, 16>;
@@ -99,15 +105,14 @@ public:
/// Find a physical register that can be freed by evicting the FixedRegisters,
/// or return NoRegister. The eviction decision is assumed to be correct (i.e.
/// no fixed live ranges are evicted) and profitable.
- virtual MCRegister
- tryFindEvictionCandidate(LiveInterval &VirtReg, const AllocationOrder &Order,
- uint8_t CostPerUseLimit,
- const SmallVirtRegSet &FixedRegisters) const = 0;
+ virtual MCRegister tryFindEvictionCandidate(
+ const LiveInterval &VirtReg, const AllocationOrder &Order,
+ uint8_t CostPerUseLimit, const SmallVirtRegSet &FixedRegisters) const = 0;
/// Find out if we can evict the live ranges occupying the given PhysReg,
/// which is a hint (preferred register) for VirtReg.
virtual bool
- canEvictHintInterference(LiveInterval &VirtReg, MCRegister PhysReg,
+ canEvictHintInterference(const LiveInterval &VirtReg, MCRegister PhysReg,
const SmallVirtRegSet &FixedRegisters) const = 0;
/// Returns true if the given \p PhysReg is a callee saved register and has
@@ -115,9 +120,9 @@ public:
bool isUnusedCalleeSavedReg(MCRegister PhysReg) const;
protected:
- RegAllocEvictionAdvisor(MachineFunction &MF, const RAGreedy &RA);
+ RegAllocEvictionAdvisor(const MachineFunction &MF, const RAGreedy &RA);
- Register canReassign(LiveInterval &VirtReg, Register PrevReg) const;
+ Register canReassign(const LiveInterval &VirtReg, Register PrevReg) const;
// Get the upper limit of elements in the given Order we need to analize.
// TODO: is this heuristic, we could consider learning it.
@@ -173,7 +178,7 @@ public:
/// Get an advisor for the given context (i.e. machine function, etc)
virtual std::unique_ptr<RegAllocEvictionAdvisor>
- getAdvisor(MachineFunction &MF, const RAGreedy &RA) = 0;
+ getAdvisor(const MachineFunction &MF, const RAGreedy &RA) = 0;
AdvisorMode getAdvisorMode() const { return Mode; }
protected:
@@ -200,19 +205,20 @@ RegAllocEvictionAdvisorAnalysis *createDevelopmentModeAdvisor();
// out of RegAllocGreedy.cpp
class DefaultEvictionAdvisor : public RegAllocEvictionAdvisor {
public:
- DefaultEvictionAdvisor(MachineFunction &MF, const RAGreedy &RA)
+ DefaultEvictionAdvisor(const MachineFunction &MF, const RAGreedy &RA)
: RegAllocEvictionAdvisor(MF, RA) {}
private:
- MCRegister tryFindEvictionCandidate(LiveInterval &, const AllocationOrder &,
- uint8_t,
+ MCRegister tryFindEvictionCandidate(const LiveInterval &,
+ const AllocationOrder &, uint8_t,
const SmallVirtRegSet &) const override;
- bool canEvictHintInterference(LiveInterval &, MCRegister,
+ bool canEvictHintInterference(const LiveInterval &, MCRegister,
const SmallVirtRegSet &) const override;
- bool canEvictInterferenceBasedOnCost(LiveInterval &, MCRegister, bool,
+ bool canEvictInterferenceBasedOnCost(const LiveInterval &, MCRegister, bool,
EvictionCost &,
const SmallVirtRegSet &) const;
- bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool) const;
+ bool shouldEvict(const LiveInterval &A, bool, const LiveInterval &B,
+ bool) const;
};
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp
index 6653145d3d2a..72ceaa768803 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -35,14 +35,9 @@
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
-#include "llvm/IR/DebugLoc.h"
-#include "llvm/IR/Metadata.h"
#include "llvm/InitializePasses.h"
-#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -364,7 +359,16 @@ bool RegAllocFast::mayLiveOut(Register VirtReg) {
// If this block loops back to itself, it is necessary to check whether the
// use comes after the def.
if (MBB->isSuccessor(MBB)) {
- SelfLoopDef = MRI->getUniqueVRegDef(VirtReg);
+ // Find the first def in the self loop MBB.
+ for (const MachineInstr &DefInst : MRI->def_instructions(VirtReg)) {
+ if (DefInst.getParent() != MBB) {
+ MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
+ return true;
+ } else {
+ if (!SelfLoopDef || dominates(*MBB, DefInst.getIterator(), SelfLoopDef))
+ SelfLoopDef = &DefInst;
+ }
+ }
if (!SelfLoopDef) {
MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
return true;
@@ -1117,6 +1121,12 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
RegMasks.clear();
BundleVirtRegsMap.clear();
+ auto TiedOpIsUndef = [&](const MachineOperand &MO, unsigned Idx) {
+ assert(MO.isTied());
+ unsigned TiedIdx = MI.findTiedOperandIdx(Idx);
+ const MachineOperand &TiedMO = MI.getOperand(TiedIdx);
+ return TiedMO.isUndef();
+ };
// Scan for special cases; Apply pre-assigned register defs to state.
bool HasPhysRegUse = false;
bool HasRegMask = false;
@@ -1124,7 +1134,8 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
bool HasDef = false;
bool HasEarlyClobber = false;
bool NeedToAssignLiveThroughs = false;
- for (MachineOperand &MO : MI.operands()) {
+ for (unsigned I = 0; I < MI.getNumOperands(); ++I) {
+ MachineOperand &MO = MI.getOperand(I);
if (MO.isReg()) {
Register Reg = MO.getReg();
if (Reg.isVirtual()) {
@@ -1135,7 +1146,8 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
HasEarlyClobber = true;
NeedToAssignLiveThroughs = true;
}
- if (MO.isTied() || (MO.getSubReg() != 0 && !MO.isUndef()))
+ if ((MO.isTied() && !TiedOpIsUndef(MO, I)) ||
+ (MO.getSubReg() != 0 && !MO.isUndef()))
NeedToAssignLiveThroughs = true;
}
} else if (Reg.isPhysical()) {
@@ -1235,7 +1247,8 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
MachineOperand &MO = MI.getOperand(OpIdx);
LLVM_DEBUG(dbgs() << "Allocating " << MO << '\n');
unsigned Reg = MO.getReg();
- if (MO.isEarlyClobber() || MO.isTied() ||
+ if (MO.isEarlyClobber() ||
+ (MO.isTied() && !TiedOpIsUndef(MO, OpIdx)) ||
(MO.getSubReg() && !MO.isUndef())) {
defineLiveThroughVirtReg(MI, OpIdx, Reg);
} else {
@@ -1258,7 +1271,8 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
// Free registers occupied by defs.
// Iterate operands in reverse order, so we see the implicit super register
// defs first (we added them earlier in case of <def,read-undef>).
- for (MachineOperand &MO : llvm::reverse(MI.operands())) {
+ for (signed I = MI.getNumOperands() - 1; I >= 0; --I) {
+ MachineOperand &MO = MI.getOperand(I);
if (!MO.isReg() || !MO.isDef())
continue;
@@ -1273,7 +1287,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
"tied def assigned to clobbered register");
// Do not free tied operands and early clobbers.
- if (MO.isTied() || MO.isEarlyClobber())
+ if ((MO.isTied() && !TiedOpIsUndef(MO, I)) || MO.isEarlyClobber())
continue;
Register Reg = MO.getReg();
if (!Reg)
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp
index 7870574df5b2..2efb98ae200d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -21,9 +21,7 @@
#include "SplitKit.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/IndexedMap.h"
-#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
@@ -62,6 +60,7 @@
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
#include "llvm/Support/BlockFrequency.h"
@@ -71,13 +70,9 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
-#include <memory>
-#include <queue>
-#include <tuple>
#include <utility>
using namespace llvm;
@@ -127,11 +122,18 @@ CSRFirstTimeCost("regalloc-csr-first-time-cost",
cl::desc("Cost for first time use of callee-saved register."),
cl::init(0), cl::Hidden);
-static cl::opt<bool> ConsiderLocalIntervalCost(
- "consider-local-interval-cost", cl::Hidden,
- cl::desc("Consider the cost of local intervals created by a split "
- "candidate when choosing the best split candidate."),
- cl::init(false));
+static cl::opt<unsigned long> GrowRegionComplexityBudget(
+ "grow-region-complexity-budget",
+ cl::desc("growRegion() does not scale with the number of BB edges, so "
+ "limit its budget and bail out once we reach the limit."),
+ cl::init(10000), cl::Hidden);
+
+static cl::opt<bool> GreedyRegClassPriorityTrumpsGlobalness(
+ "greedy-regclass-priority-trumps-globalness",
+ cl::desc("Change the greedy register allocator's live range priority "
+ "calculation to make the AllocationPriority of the register class "
+ "more important then whether the range is global"),
+ cl::Hidden);
static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator",
createGreedyRegisterAllocator);
@@ -277,9 +279,9 @@ void RAGreedy::releaseMemory() {
GlobalCand.clear();
}
-void RAGreedy::enqueueImpl(LiveInterval *LI) { enqueue(Queue, LI); }
+void RAGreedy::enqueueImpl(const LiveInterval *LI) { enqueue(Queue, LI); }
-void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) {
+void RAGreedy::enqueue(PQueue &CurQueue, const LiveInterval *LI) {
// Prioritize live ranges by size, assigning larger ranges first.
// The queue holds (size, reg) pairs.
const unsigned Size = LI->getSize();
@@ -308,8 +310,10 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) {
// prevents excessive spilling in pathological cases.
bool ReverseLocal = TRI->reverseLocalAssignment();
const TargetRegisterClass &RC = *MRI->getRegClass(Reg);
- bool ForceGlobal = !ReverseLocal &&
- (Size / SlotIndex::InstrDist) > (2 * RCI.getNumAllocatableRegs(&RC));
+ bool ForceGlobal =
+ !ReverseLocal && (Size / SlotIndex::InstrDist) >
+ (2 * RegClassInfo.getNumAllocatableRegs(&RC));
+ unsigned GlobalBit = 0;
if (Stage == RS_Assign && !ForceGlobal && !LI->empty() &&
LIS->intervalIsInOneMBB(*LI)) {
@@ -324,15 +328,18 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) {
// large blocks on targets with many physical registers.
Prio = Indexes->getZeroIndex().getInstrDistance(LI->endIndex());
}
- Prio |= RC.AllocationPriority << 24;
} else {
// Allocate global and split ranges in long->short order. Long ranges that
// don't fit should be spilled (or split) ASAP so they don't create
// interference. Mark a bit to prioritize global above local ranges.
- Prio = (1u << 29) + Size;
-
- Prio |= RC.AllocationPriority << 24;
+ Prio = Size;
+ GlobalBit = 1;
}
+ if (RegClassPriorityTrumpsGlobalness)
+ Prio |= RC.AllocationPriority << 25 | GlobalBit << 24;
+ else
+ Prio |= GlobalBit << 29 | RC.AllocationPriority << 24;
+
// Mark a higher bit to prioritize global and local above RS_Split.
Prio |= (1u << 31);
@@ -345,9 +352,9 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) {
CurQueue.push(std::make_pair(Prio, ~Reg));
}
-LiveInterval *RAGreedy::dequeue() { return dequeue(Queue); }
+const LiveInterval *RAGreedy::dequeue() { return dequeue(Queue); }
-LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) {
+const LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) {
if (CurQueue.empty())
return nullptr;
LiveInterval *LI = &LIS->getInterval(~CurQueue.top().second);
@@ -360,10 +367,10 @@ LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) {
//===----------------------------------------------------------------------===//
/// tryAssign - Try to assign VirtReg to an available register.
-MCRegister RAGreedy::tryAssign(LiveInterval &VirtReg,
- AllocationOrder &Order,
- SmallVectorImpl<Register> &NewVRegs,
- const SmallVirtRegSet &FixedRegisters) {
+MCRegister RAGreedy::tryAssign(const LiveInterval &VirtReg,
+ AllocationOrder &Order,
+ SmallVectorImpl<Register> &NewVRegs,
+ const SmallVirtRegSet &FixedRegisters) {
MCRegister PhysReg;
for (auto I = Order.begin(), E = Order.end(); I != E && !PhysReg; ++I) {
assert(*I);
@@ -413,7 +420,7 @@ MCRegister RAGreedy::tryAssign(LiveInterval &VirtReg,
// Interference eviction
//===----------------------------------------------------------------------===//
-Register RegAllocEvictionAdvisor::canReassign(LiveInterval &VirtReg,
+Register RegAllocEvictionAdvisor::canReassign(const LiveInterval &VirtReg,
Register PrevReg) const {
auto Order =
AllocationOrder::create(VirtReg.reg(), *VRM, RegClassInfo, Matrix);
@@ -440,94 +447,11 @@ Register RegAllocEvictionAdvisor::canReassign(LiveInterval &VirtReg,
return PhysReg;
}
-/// Return true if all interferences between VirtReg and PhysReg between
-/// Start and End can be evicted.
-///
-/// \param VirtReg Live range that is about to be assigned.
-/// \param PhysReg Desired register for assignment.
-/// \param Start Start of range to look for interferences.
-/// \param End End of range to look for interferences.
-/// \param MaxCost Only look for cheaper candidates and update with new cost
-/// when returning true.
-/// \return True when interference can be evicted cheaper than MaxCost.
-bool RAGreedy::canEvictInterferenceInRange(const LiveInterval &VirtReg,
- MCRegister PhysReg, SlotIndex Start,
- SlotIndex End,
- EvictionCost &MaxCost) const {
- EvictionCost Cost;
-
- for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
- LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
-
- // Check if any interfering live range is heavier than MaxWeight.
- for (const LiveInterval *Intf : reverse(Q.interferingVRegs())) {
- // Check if interference overlast the segment in interest.
- if (!Intf->overlaps(Start, End))
- continue;
-
- // Cannot evict non virtual reg interference.
- if (!Register::isVirtualRegister(Intf->reg()))
- return false;
- // Never evict spill products. They cannot split or spill.
- if (ExtraInfo->getStage(*Intf) == RS_Done)
- return false;
-
- // Would this break a satisfied hint?
- bool BreaksHint = VRM->hasPreferredPhys(Intf->reg());
- // Update eviction cost.
- Cost.BrokenHints += BreaksHint;
- Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight());
- // Abort if this would be too expensive.
- if (!(Cost < MaxCost))
- return false;
- }
- }
-
- if (Cost.MaxWeight == 0)
- return false;
-
- MaxCost = Cost;
- return true;
-}
-
-/// Return the physical register that will be best
-/// candidate for eviction by a local split interval that will be created
-/// between Start and End.
-///
-/// \param Order The allocation order
-/// \param VirtReg Live range that is about to be assigned.
-/// \param Start Start of range to look for interferences
-/// \param End End of range to look for interferences
-/// \param BestEvictweight The eviction cost of that eviction
-/// \return The PhysReg which is the best candidate for eviction and the
-/// eviction cost in BestEvictweight
-MCRegister RAGreedy::getCheapestEvicteeWeight(const AllocationOrder &Order,
- const LiveInterval &VirtReg,
- SlotIndex Start, SlotIndex End,
- float *BestEvictweight) const {
- EvictionCost BestEvictCost;
- BestEvictCost.setMax();
- BestEvictCost.MaxWeight = VirtReg.weight();
- MCRegister BestEvicteePhys;
-
- // Go over all physical registers and find the best candidate for eviction
- for (MCRegister PhysReg : Order.getOrder()) {
-
- if (!canEvictInterferenceInRange(VirtReg, PhysReg, Start, End,
- BestEvictCost))
- continue;
-
- // Best so far.
- BestEvicteePhys = PhysReg;
- }
- *BestEvictweight = BestEvictCost.MaxWeight;
- return BestEvicteePhys;
-}
-
/// evictInterference - Evict any interferring registers that prevent VirtReg
/// from being assigned to Physreg. This assumes that canEvictInterference
/// returned true.
-void RAGreedy::evictInterference(LiveInterval &VirtReg, MCRegister PhysReg,
+void RAGreedy::evictInterference(const LiveInterval &VirtReg,
+ MCRegister PhysReg,
SmallVectorImpl<Register> &NewVRegs) {
// Make sure that VirtReg has a cascade number, and assign that cascade
// number to every evicted register. These live ranges than then only be
@@ -538,25 +462,23 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, MCRegister PhysReg,
<< " interference: Cascade " << Cascade << '\n');
// Collect all interfering virtregs first.
- SmallVector<LiveInterval*, 8> Intfs;
+ SmallVector<const LiveInterval *, 8> Intfs;
for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
// We usually have the interfering VRegs cached so collectInterferingVRegs()
// should be fast, we may need to recalculate if when different physregs
// overlap the same register unit so we had different SubRanges queried
// against it.
- ArrayRef<LiveInterval*> IVR = Q.interferingVRegs();
+ ArrayRef<const LiveInterval *> IVR = Q.interferingVRegs();
Intfs.append(IVR.begin(), IVR.end());
}
// Evict them second. This will invalidate the queries.
- for (LiveInterval *Intf : Intfs) {
+ for (const LiveInterval *Intf : Intfs) {
// The same VirtReg may be present in multiple RegUnits. Skip duplicates.
if (!VRM->hasPhys(Intf->reg()))
continue;
- LastEvicted.addEviction(PhysReg, VirtReg.reg(), Intf->reg());
-
Matrix->unassign(*Intf);
assert((ExtraInfo->getCascade(Intf->reg()) < Cascade ||
VirtReg.isSpillable() < Intf->isSpillable()) &&
@@ -624,7 +546,8 @@ bool RegAllocEvictionAdvisor::canAllocatePhysReg(unsigned CostPerUseLimit,
/// @param VirtReg Currently unassigned virtual register.
/// @param Order Physregs to try.
/// @return Physreg to assign VirtReg, or 0.
-MCRegister RAGreedy::tryEvict(LiveInterval &VirtReg, AllocationOrder &Order,
+MCRegister RAGreedy::tryEvict(const LiveInterval &VirtReg,
+ AllocationOrder &Order,
SmallVectorImpl<Register> &NewVRegs,
uint8_t CostPerUseLimit,
const SmallVirtRegSet &FixedRegisters) {
@@ -782,12 +705,17 @@ bool RAGreedy::growRegion(GlobalSplitCandidate &Cand) {
unsigned Visited = 0;
#endif
+ unsigned long Budget = GrowRegionComplexityBudget;
while (true) {
ArrayRef<unsigned> NewBundles = SpillPlacer->getRecentPositive();
// Find new through blocks in the periphery of PrefRegBundles.
for (unsigned Bundle : NewBundles) {
// Look at all blocks connected to Bundle in the full graph.
ArrayRef<unsigned> Blocks = Bundles->getBlocks(Bundle);
+ // Limit compilation time by bailing out after we use all our budget.
+ if (Blocks.size() >= Budget)
+ return false;
+ Budget -= Blocks.size();
for (unsigned Block : Blocks) {
if (!Todo.test(Block))
continue;
@@ -887,147 +815,14 @@ BlockFrequency RAGreedy::calcSpillCost() {
return Cost;
}
-/// Check if splitting Evictee will create a local split interval in
-/// basic block number BBNumber that may cause a bad eviction chain. This is
-/// intended to prevent bad eviction sequences like:
-/// movl %ebp, 8(%esp) # 4-byte Spill
-/// movl %ecx, %ebp
-/// movl %ebx, %ecx
-/// movl %edi, %ebx
-/// movl %edx, %edi
-/// cltd
-/// idivl %esi
-/// movl %edi, %edx
-/// movl %ebx, %edi
-/// movl %ecx, %ebx
-/// movl %ebp, %ecx
-/// movl 16(%esp), %ebp # 4 - byte Reload
-///
-/// Such sequences are created in 2 scenarios:
-///
-/// Scenario #1:
-/// %0 is evicted from physreg0 by %1.
-/// Evictee %0 is intended for region splitting with split candidate
-/// physreg0 (the reg %0 was evicted from).
-/// Region splitting creates a local interval because of interference with the
-/// evictor %1 (normally region splitting creates 2 interval, the "by reg"
-/// and "by stack" intervals and local interval created when interference
-/// occurs).
-/// One of the split intervals ends up evicting %2 from physreg1.
-/// Evictee %2 is intended for region splitting with split candidate
-/// physreg1.
-/// One of the split intervals ends up evicting %3 from physreg2, etc.
-///
-/// Scenario #2
-/// %0 is evicted from physreg0 by %1.
-/// %2 is evicted from physreg2 by %3 etc.
-/// Evictee %0 is intended for region splitting with split candidate
-/// physreg1.
-/// Region splitting creates a local interval because of interference with the
-/// evictor %1.
-/// One of the split intervals ends up evicting back original evictor %1
-/// from physreg0 (the reg %0 was evicted from).
-/// Another evictee %2 is intended for region splitting with split candidate
-/// physreg1.
-/// One of the split intervals ends up evicting %3 from physreg2, etc.
-///
-/// \param Evictee The register considered to be split.
-/// \param Cand The split candidate that determines the physical register
-/// we are splitting for and the interferences.
-/// \param BBNumber The number of a BB for which the region split process will
-/// create a local split interval.
-/// \param Order The physical registers that may get evicted by a split
-/// artifact of Evictee.
-/// \return True if splitting Evictee may cause a bad eviction chain, false
-/// otherwise.
-bool RAGreedy::splitCanCauseEvictionChain(Register Evictee,
- GlobalSplitCandidate &Cand,
- unsigned BBNumber,
- const AllocationOrder &Order) {
- EvictionTrack::EvictorInfo VregEvictorInfo = LastEvicted.getEvictor(Evictee);
- unsigned Evictor = VregEvictorInfo.first;
- MCRegister PhysReg = VregEvictorInfo.second;
-
- // No actual evictor.
- if (!Evictor || !PhysReg)
- return false;
-
- float MaxWeight = 0;
- MCRegister FutureEvictedPhysReg =
- getCheapestEvicteeWeight(Order, LIS->getInterval(Evictee),
- Cand.Intf.first(), Cand.Intf.last(), &MaxWeight);
-
- // The bad eviction chain occurs when either the split candidate is the
- // evicting reg or one of the split artifact will evict the evicting reg.
- if ((PhysReg != Cand.PhysReg) && (PhysReg != FutureEvictedPhysReg))
- return false;
-
- Cand.Intf.moveToBlock(BBNumber);
-
- // Check to see if the Evictor contains interference (with Evictee) in the
- // given BB. If so, this interference caused the eviction of Evictee from
- // PhysReg. This suggest that we will create a local interval during the
- // region split to avoid this interference This local interval may cause a bad
- // eviction chain.
- if (!LIS->hasInterval(Evictor))
- return false;
- LiveInterval &EvictorLI = LIS->getInterval(Evictor);
- if (EvictorLI.FindSegmentContaining(Cand.Intf.first()) == EvictorLI.end())
- return false;
-
- // Now, check to see if the local interval we will create is going to be
- // expensive enough to evict somebody If so, this may cause a bad eviction
- // chain.
- float splitArtifactWeight =
- VRAI->futureWeight(LIS->getInterval(Evictee),
- Cand.Intf.first().getPrevIndex(), Cand.Intf.last());
- if (splitArtifactWeight >= 0 && splitArtifactWeight < MaxWeight)
- return false;
-
- return true;
-}
-
-/// Check if splitting VirtRegToSplit will create a local split interval
-/// in basic block number BBNumber that may cause a spill.
-///
-/// \param VirtRegToSplit The register considered to be split.
-/// \param Cand The split candidate that determines the physical
-/// register we are splitting for and the interferences.
-/// \param BBNumber The number of a BB for which the region split process
-/// will create a local split interval.
-/// \param Order The physical registers that may get evicted by a
-/// split artifact of VirtRegToSplit.
-/// \return True if splitting VirtRegToSplit may cause a spill, false
-/// otherwise.
-bool RAGreedy::splitCanCauseLocalSpill(unsigned VirtRegToSplit,
- GlobalSplitCandidate &Cand,
- unsigned BBNumber,
- const AllocationOrder &Order) {
- Cand.Intf.moveToBlock(BBNumber);
-
- // Check if the local interval will find a non interfereing assignment.
- for (auto PhysReg : Order.getOrder()) {
- if (!Matrix->checkInterference(Cand.Intf.first().getPrevIndex(),
- Cand.Intf.last(), PhysReg))
- return false;
- }
-
- // The local interval is not able to find non interferencing assignment
- // and not able to evict a less worthy interval, therfore, it can cause a
- // spill.
- return true;
-}
-
/// calcGlobalSplitCost - Return the global split cost of following the split
/// pattern in LiveBundles. This cost should be added to the local cost of the
/// interference pattern in SplitConstraints.
///
BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand,
- const AllocationOrder &Order,
- bool *CanCauseEvictionChain) {
+ const AllocationOrder &Order) {
BlockFrequency GlobalCost = 0;
const BitVector &LiveBundles = Cand.LiveBundles;
- Register VirtRegToSplit = SA->getParent().reg();
ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
for (unsigned I = 0; I != UseBlocks.size(); ++I) {
const SplitAnalysis::BlockInfo &BI = UseBlocks[I];
@@ -1037,29 +832,6 @@ BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand,
unsigned Ins = 0;
Cand.Intf.moveToBlock(BC.Number);
- // Check wheather a local interval is going to be created during the region
- // split. Calculate adavanced spilt cost (cost of local intervals) if option
- // is enabled.
- if (EnableAdvancedRASplitCost && Cand.Intf.hasInterference() && BI.LiveIn &&
- BI.LiveOut && RegIn && RegOut) {
-
- if (CanCauseEvictionChain &&
- splitCanCauseEvictionChain(VirtRegToSplit, Cand, BC.Number, Order)) {
- // This interference causes our eviction from this assignment, we might
- // evict somebody else and eventually someone will spill, add that cost.
- // See splitCanCauseEvictionChain for detailed description of scenarios.
- GlobalCost += SpillPlacer->getBlockFrequency(BC.Number);
- GlobalCost += SpillPlacer->getBlockFrequency(BC.Number);
-
- *CanCauseEvictionChain = true;
-
- } else if (splitCanCauseLocalSpill(VirtRegToSplit, Cand, BC.Number,
- Order)) {
- // This interference causes local interval to spill, add that cost.
- GlobalCost += SpillPlacer->getBlockFrequency(BC.Number);
- GlobalCost += SpillPlacer->getBlockFrequency(BC.Number);
- }
- }
if (BI.LiveIn)
Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg);
@@ -1080,20 +852,6 @@ BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand,
if (Cand.Intf.hasInterference()) {
GlobalCost += SpillPlacer->getBlockFrequency(Number);
GlobalCost += SpillPlacer->getBlockFrequency(Number);
-
- // Check wheather a local interval is going to be created during the
- // region split.
- if (EnableAdvancedRASplitCost && CanCauseEvictionChain &&
- splitCanCauseEvictionChain(VirtRegToSplit, Cand, Number, Order)) {
- // This interference cause our eviction from this assignment, we might
- // evict somebody else, add that cost.
- // See splitCanCauseEvictionChain for detailed description of
- // scenarios.
- GlobalCost += SpillPlacer->getBlockFrequency(Number);
- GlobalCost += SpillPlacer->getBlockFrequency(Number);
-
- *CanCauseEvictionChain = true;
- }
}
continue;
}
@@ -1253,7 +1011,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
MF->verify(this, "After splitting live range around region");
}
-MCRegister RAGreedy::tryRegionSplit(LiveInterval &VirtReg,
+MCRegister RAGreedy::tryRegionSplit(const LiveInterval &VirtReg,
AllocationOrder &Order,
SmallVectorImpl<Register> &NewVRegs) {
if (!TRI->shouldRegionSplitForVirtReg(*MF, VirtReg))
@@ -1276,19 +1034,8 @@ MCRegister RAGreedy::tryRegionSplit(LiveInterval &VirtReg,
MBFI->printBlockFreq(dbgs(), BestCost) << '\n');
}
- bool CanCauseEvictionChain = false;
- unsigned BestCand =
- calculateRegionSplitCost(VirtReg, Order, BestCost, NumCands,
- false /*IgnoreCSR*/, &CanCauseEvictionChain);
-
- // Split candidates with compact regions can cause a bad eviction sequence.
- // See splitCanCauseEvictionChain for detailed description of scenarios.
- // To avoid it, we need to comapre the cost with the spill cost and not the
- // current max frequency.
- if (HasCompact && (BestCost > SpillCost) && (BestCand != NoCand) &&
- CanCauseEvictionChain) {
- return MCRegister::NoRegister;
- }
+ unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost,
+ NumCands, false /*IgnoreCSR*/);
// No solutions found, fall back to single block splitting.
if (!HasCompact && BestCand == NoCand)
@@ -1297,11 +1044,11 @@ MCRegister RAGreedy::tryRegionSplit(LiveInterval &VirtReg,
return doRegionSplit(VirtReg, BestCand, HasCompact, NewVRegs);
}
-unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg,
+unsigned RAGreedy::calculateRegionSplitCost(const LiveInterval &VirtReg,
AllocationOrder &Order,
BlockFrequency &BestCost,
- unsigned &NumCands, bool IgnoreCSR,
- bool *CanCauseEvictionChain) {
+ unsigned &NumCands,
+ bool IgnoreCSR) {
unsigned BestCand = NoCand;
for (MCPhysReg PhysReg : Order) {
assert(PhysReg);
@@ -1364,8 +1111,7 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg,
continue;
}
- bool HasEvictionChain = false;
- Cost += calcGlobalSplitCost(Cand, Order, &HasEvictionChain);
+ Cost += calcGlobalSplitCost(Cand, Order);
LLVM_DEBUG({
dbgs() << ", total = ";
MBFI->printBlockFreq(dbgs(), Cost) << " with bundles";
@@ -1376,28 +1122,14 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg,
if (Cost < BestCost) {
BestCand = NumCands;
BestCost = Cost;
- // See splitCanCauseEvictionChain for detailed description of bad
- // eviction chain scenarios.
- if (CanCauseEvictionChain)
- *CanCauseEvictionChain = HasEvictionChain;
}
++NumCands;
}
- if (CanCauseEvictionChain && BestCand != NoCand) {
- // See splitCanCauseEvictionChain for detailed description of bad
- // eviction chain scenarios.
- LLVM_DEBUG(dbgs() << "Best split candidate of vreg "
- << printReg(VirtReg.reg(), TRI) << " may ");
- if (!(*CanCauseEvictionChain))
- LLVM_DEBUG(dbgs() << "not ");
- LLVM_DEBUG(dbgs() << "cause bad eviction chain\n");
- }
-
return BestCand;
}
-unsigned RAGreedy::doRegionSplit(LiveInterval &VirtReg, unsigned BestCand,
+unsigned RAGreedy::doRegionSplit(const LiveInterval &VirtReg, unsigned BestCand,
bool HasCompact,
SmallVectorImpl<Register> &NewVRegs) {
SmallVector<unsigned, 8> UsedCands;
@@ -1444,7 +1176,8 @@ unsigned RAGreedy::doRegionSplit(LiveInterval &VirtReg, unsigned BestCand,
/// tryBlockSplit - Split a global live range around every block with uses. This
/// creates a lot of local live ranges, that will be split by tryLocalSplit if
/// they don't allocate.
-unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order,
+unsigned RAGreedy::tryBlockSplit(const LiveInterval &VirtReg,
+ AllocationOrder &Order,
SmallVectorImpl<Register> &NewVRegs) {
assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed");
Register Reg = VirtReg.reg();
@@ -1507,9 +1240,9 @@ static unsigned getNumAllocatableRegsForConstraints(
/// be moved to a larger register class.
///
/// This is similar to spilling to a larger register class.
-unsigned
-RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
- SmallVectorImpl<Register> &NewVRegs) {
+unsigned RAGreedy::tryInstructionSplit(const LiveInterval &VirtReg,
+ AllocationOrder &Order,
+ SmallVectorImpl<Register> &NewVRegs) {
const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg());
// There is no point to this if there are no larger sub-classes.
if (!RegClassInfo.isProperSubClass(CurRC))
@@ -1529,7 +1262,8 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
const TargetRegisterClass *SuperRC =
TRI->getLargestLegalSuperClass(CurRC, *MF);
- unsigned SuperRCNumAllocatableRegs = RCI.getNumAllocatableRegs(SuperRC);
+ unsigned SuperRCNumAllocatableRegs =
+ RegClassInfo.getNumAllocatableRegs(SuperRC);
// Split around every non-copy instruction if this split will relax
// the constraints on the virtual register.
// Otherwise, splitting just inserts uncoalescable copies that do not help
@@ -1539,7 +1273,7 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
if (MI->isFullCopy() ||
SuperRCNumAllocatableRegs ==
getNumAllocatableRegsForConstraints(MI, VirtReg.reg(), SuperRC,
- TII, TRI, RCI)) {
+ TII, TRI, RegClassInfo)) {
LLVM_DEBUG(dbgs() << " skip:\t" << Use << '\t' << *MI);
continue;
}
@@ -1649,7 +1383,8 @@ void RAGreedy::calcGapWeights(MCRegister PhysReg,
/// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only
/// basic block.
///
-unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
+unsigned RAGreedy::tryLocalSplit(const LiveInterval &VirtReg,
+ AllocationOrder &Order,
SmallVectorImpl<Register> &NewVRegs) {
// TODO: the function currently only handles a single UseBlock; it should be
// possible to generalize.
@@ -1879,7 +1614,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
/// trySplit - Try to split VirtReg or one of its interferences, making it
/// assignable.
/// @return Physreg when VirtReg may be assigned and/or new NewVRegs.
-unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
+unsigned RAGreedy::trySplit(const LiveInterval &VirtReg, AllocationOrder &Order,
SmallVectorImpl<Register> &NewVRegs,
const SmallVirtRegSet &FixedRegisters) {
// Ranges must be Split2 or less.
@@ -1928,6 +1663,18 @@ static bool hasTiedDef(MachineRegisterInfo *MRI, unsigned reg) {
return false;
}
+/// Return true if the existing assignment of \p Intf overlaps, but is not the
+/// same, as \p PhysReg.
+static bool assignedRegPartiallyOverlaps(const TargetRegisterInfo &TRI,
+ const VirtRegMap &VRM,
+ MCRegister PhysReg,
+ const LiveInterval &Intf) {
+ MCRegister AssignedReg = VRM.getPhys(Intf.reg());
+ if (PhysReg == AssignedReg)
+ return false;
+ return TRI.regsOverlap(PhysReg, AssignedReg);
+}
+
/// mayRecolorAllInterferences - Check if the virtual registers that
/// interfere with \p VirtReg on \p PhysReg (or one of its aliases) may be
/// recolored to free \p PhysReg.
@@ -1937,8 +1684,8 @@ static bool hasTiedDef(MachineRegisterInfo *MRI, unsigned reg) {
/// \p FixedRegisters contains all the virtual registers that cannot be
/// recolored.
bool RAGreedy::mayRecolorAllInterferences(
- MCRegister PhysReg, LiveInterval &VirtReg, SmallLISet &RecoloringCandidates,
- const SmallVirtRegSet &FixedRegisters) {
+ MCRegister PhysReg, const LiveInterval &VirtReg,
+ SmallLISet &RecoloringCandidates, const SmallVirtRegSet &FixedRegisters) {
const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg());
for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
@@ -1952,13 +1699,21 @@ bool RAGreedy::mayRecolorAllInterferences(
CutOffInfo |= CO_Interf;
return false;
}
- for (LiveInterval *Intf : reverse(Q.interferingVRegs())) {
- // If Intf is done and sit on the same register class as VirtReg,
- // it would not be recolorable as it is in the same state as VirtReg.
- // However, if VirtReg has tied defs and Intf doesn't, then
+ for (const LiveInterval *Intf : reverse(Q.interferingVRegs())) {
+ // If Intf is done and sits on the same register class as VirtReg, it
+ // would not be recolorable as it is in the same state as
+ // VirtReg. However there are at least two exceptions.
+ //
+ // If VirtReg has tied defs and Intf doesn't, then
// there is still a point in examining if it can be recolorable.
+ //
+ // Additionally, if the register class has overlapping tuple members, it
+ // may still be recolorable using a different tuple. This is more likely
+ // if the existing assignment aliases with the candidate.
+ //
if (((ExtraInfo->getStage(*Intf) == RS_Done &&
- MRI->getRegClass(Intf->reg()) == CurRC) &&
+ MRI->getRegClass(Intf->reg()) == CurRC &&
+ !assignedRegPartiallyOverlaps(*TRI, *VRM, PhysReg, *Intf)) &&
!(hasTiedDef(MRI, VirtReg.reg()) &&
!hasTiedDef(MRI, Intf->reg()))) ||
FixedRegisters.count(Intf->reg())) {
@@ -2008,18 +1763,26 @@ bool RAGreedy::mayRecolorAllInterferences(
/// (split, spill) during the process and that must be assigned.
/// \p FixedRegisters contains all the virtual registers that cannot be
/// recolored.
+///
+/// \p RecolorStack tracks the original assignments of successfully recolored
+/// registers.
+///
/// \p Depth gives the current depth of the last chance recoloring.
/// \return a physical register that can be used for VirtReg or ~0u if none
/// exists.
-unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
+unsigned RAGreedy::tryLastChanceRecoloring(const LiveInterval &VirtReg,
AllocationOrder &Order,
SmallVectorImpl<Register> &NewVRegs,
SmallVirtRegSet &FixedRegisters,
+ RecoloringStack &RecolorStack,
unsigned Depth) {
if (!TRI->shouldUseLastChanceRecoloringForVirtReg(*MF, VirtReg))
return ~0u;
LLVM_DEBUG(dbgs() << "Try last chance recoloring for " << VirtReg << '\n');
+
+ const ssize_t EntryStackSize = RecolorStack.size();
+
// Ranges must be Done.
assert((ExtraInfo->getStage(VirtReg) >= RS_Done || !VirtReg.isSpillable()) &&
"Last chance recoloring should really be last chance");
@@ -2035,9 +1798,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
// Set of Live intervals that will need to be recolored.
SmallLISet RecoloringCandidates;
- // Record the original mapping virtual register to physical register in case
- // the recoloring fails.
- DenseMap<Register, MCRegister> VirtRegToPhysReg;
+
// Mark VirtReg as fixed, i.e., it will not be recolored pass this point in
// this recoloring "session".
assert(!FixedRegisters.count(VirtReg.reg()));
@@ -2049,7 +1810,6 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
LLVM_DEBUG(dbgs() << "Try to assign: " << VirtReg << " to "
<< printReg(PhysReg, TRI) << '\n');
RecoloringCandidates.clear();
- VirtRegToPhysReg.clear();
CurrentNewVRegs.clear();
// It is only possible to recolor virtual register interference.
@@ -2069,18 +1829,19 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
continue;
}
- // RecoloringCandidates contains all the virtual registers that interfer
- // with VirtReg on PhysReg (or one of its aliases).
- // Enqueue them for recoloring and perform the actual recoloring.
+ // RecoloringCandidates contains all the virtual registers that interfere
+ // with VirtReg on PhysReg (or one of its aliases). Enqueue them for
+ // recoloring and perform the actual recoloring.
PQueue RecoloringQueue;
- for (LiveInterval *RC : RecoloringCandidates) {
+ for (const LiveInterval *RC : RecoloringCandidates) {
Register ItVirtReg = RC->reg();
enqueue(RecoloringQueue, RC);
assert(VRM->hasPhys(ItVirtReg) &&
"Interferences are supposed to be with allocated variables");
// Record the current allocation.
- VirtRegToPhysReg[ItVirtReg] = VRM->getPhys(ItVirtReg);
+ RecolorStack.push_back(std::make_pair(RC, VRM->getPhys(ItVirtReg)));
+
// unset the related struct.
Matrix->unassign(*RC);
}
@@ -2095,7 +1856,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
// at this point for the next physical register.
SmallVirtRegSet SaveFixedRegisters(FixedRegisters);
if (tryRecoloringCandidates(RecoloringQueue, CurrentNewVRegs,
- FixedRegisters, Depth)) {
+ FixedRegisters, RecolorStack, Depth)) {
// Push the queued vregs into the main queue.
for (Register NewVReg : CurrentNewVRegs)
NewVRegs.push_back(NewVReg);
@@ -2122,13 +1883,31 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
NewVRegs.push_back(R);
}
- for (LiveInterval *RC : RecoloringCandidates) {
- Register ItVirtReg = RC->reg();
- if (VRM->hasPhys(ItVirtReg))
- Matrix->unassign(*RC);
- MCRegister ItPhysReg = VirtRegToPhysReg[ItVirtReg];
- Matrix->assign(*RC, ItPhysReg);
+ // Roll back our unsuccessful recoloring. Also roll back any successful
+ // recolorings in any recursive recoloring attempts, since it's possible
+ // they would have introduced conflicts with assignments we will be
+ // restoring further up the stack. Perform all unassignments prior to
+ // reassigning, since sub-recolorings may have conflicted with the registers
+ // we are going to restore to their original assignments.
+ for (ssize_t I = RecolorStack.size() - 1; I >= EntryStackSize; --I) {
+ const LiveInterval *LI;
+ MCRegister PhysReg;
+ std::tie(LI, PhysReg) = RecolorStack[I];
+
+ if (VRM->hasPhys(LI->reg()))
+ Matrix->unassign(*LI);
}
+
+ for (size_t I = EntryStackSize; I != RecolorStack.size(); ++I) {
+ const LiveInterval *LI;
+ MCRegister PhysReg;
+ std::tie(LI, PhysReg) = RecolorStack[I];
+ if (!LI->empty() && !MRI->reg_nodbg_empty(LI->reg()))
+ Matrix->assign(*LI, PhysReg);
+ }
+
+ // Pop the stack of recoloring attempts.
+ RecolorStack.resize(EntryStackSize);
}
// Last chance recoloring did not worked either, give up.
@@ -2146,12 +1925,13 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue,
SmallVectorImpl<Register> &NewVRegs,
SmallVirtRegSet &FixedRegisters,
+ RecoloringStack &RecolorStack,
unsigned Depth) {
while (!RecoloringQueue.empty()) {
- LiveInterval *LI = dequeue(RecoloringQueue);
+ const LiveInterval *LI = dequeue(RecoloringQueue);
LLVM_DEBUG(dbgs() << "Try to recolor: " << *LI << '\n');
- MCRegister PhysReg =
- selectOrSplitImpl(*LI, NewVRegs, FixedRegisters, Depth + 1);
+ MCRegister PhysReg = selectOrSplitImpl(*LI, NewVRegs, FixedRegisters,
+ RecolorStack, Depth + 1);
// When splitting happens, the live-range may actually be empty.
// In that case, this is okay to continue the recoloring even
// if we did not find an alternative color for it. Indeed,
@@ -2178,12 +1958,14 @@ bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue,
// Main Entry Point
//===----------------------------------------------------------------------===//
-MCRegister RAGreedy::selectOrSplit(LiveInterval &VirtReg,
+MCRegister RAGreedy::selectOrSplit(const LiveInterval &VirtReg,
SmallVectorImpl<Register> &NewVRegs) {
CutOffInfo = CO_None;
LLVMContext &Ctx = MF->getFunction().getContext();
SmallVirtRegSet FixedRegisters;
- MCRegister Reg = selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters);
+ RecoloringStack RecolorStack;
+ MCRegister Reg =
+ selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters, RecolorStack);
if (Reg == ~0U && (CutOffInfo != CO_None)) {
uint8_t CutOffEncountered = CutOffInfo & (CO_Depth | CO_Interf);
if (CutOffEncountered == CO_Depth)
@@ -2208,10 +1990,9 @@ MCRegister RAGreedy::selectOrSplit(LiveInterval &VirtReg,
/// Spilling a live range in the cold path can have lower cost than using
/// the CSR for the first time. Returns the physical register if we decide
/// to use the CSR; otherwise return 0.
-MCRegister
-RAGreedy::tryAssignCSRFirstTime(LiveInterval &VirtReg, AllocationOrder &Order,
- MCRegister PhysReg, uint8_t &CostPerUseLimit,
- SmallVectorImpl<Register> &NewVRegs) {
+MCRegister RAGreedy::tryAssignCSRFirstTime(
+ const LiveInterval &VirtReg, AllocationOrder &Order, MCRegister PhysReg,
+ uint8_t &CostPerUseLimit, SmallVectorImpl<Register> &NewVRegs) {
if (ExtraInfo->getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) {
// We choose spill over using the CSR for the first time if the spill cost
// is lower than CSRCost.
@@ -2243,7 +2024,7 @@ RAGreedy::tryAssignCSRFirstTime(LiveInterval &VirtReg, AllocationOrder &Order,
return PhysReg;
}
-void RAGreedy::aboutToRemoveInterval(LiveInterval &LI) {
+void RAGreedy::aboutToRemoveInterval(const LiveInterval &LI) {
// Do not keep invalid information around.
SetOfBrokenHints.remove(&LI);
}
@@ -2317,7 +2098,7 @@ BlockFrequency RAGreedy::getBrokenHintFreq(const HintsInfo &List,
/// For a given live range, profitability is determined by the sum of the
/// frequencies of the non-identity copies it would introduce with the old
/// and new register.
-void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) {
+void RAGreedy::tryHintRecoloring(const LiveInterval &VirtReg) {
// We have a broken hint, check if it is possible to fix it by
// reusing PhysReg for the copy-related live-ranges. Indeed, we evicted
// some register and PhysReg may be available for the other live-ranges.
@@ -2431,7 +2212,7 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) {
/// This is likely that we can assign the same register for b, c, and d,
/// getting rid of 2 copies.
void RAGreedy::tryHintsRecoloring() {
- for (LiveInterval *LI : SetOfBrokenHints) {
+ for (const LiveInterval *LI : SetOfBrokenHints) {
assert(Register::isVirtualRegister(LI->reg()) &&
"Recoloring is possible only for virtual registers");
// Some dead defs may be around (e.g., because of debug uses).
@@ -2442,9 +2223,10 @@ void RAGreedy::tryHintsRecoloring() {
}
}
-MCRegister RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
+MCRegister RAGreedy::selectOrSplitImpl(const LiveInterval &VirtReg,
SmallVectorImpl<Register> &NewVRegs,
SmallVirtRegSet &FixedRegisters,
+ RecoloringStack &RecolorStack,
unsigned Depth) {
uint8_t CostPerUseLimit = uint8_t(~0u);
// First try assigning a free register.
@@ -2452,8 +2234,6 @@ MCRegister RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
AllocationOrder::create(VirtReg.reg(), *VRM, RegClassInfo, Matrix);
if (MCRegister PhysReg =
tryAssign(VirtReg, Order, NewVRegs, FixedRegisters)) {
- // If VirtReg got an assignment, the eviction info is no longer relevant.
- LastEvicted.clearEvicteeInfo(VirtReg.reg());
// When NewVRegs is not empty, we may have made decisions such as evicting
// a virtual register, go with the earlier decisions and use the physical
// register.
@@ -2488,9 +2268,6 @@ MCRegister RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
// copy-related live-ranges.
if (Hint && Hint != PhysReg)
SetOfBrokenHints.insert(&VirtReg);
- // If VirtReg eviction someone, the eviction info for it as an evictee is
- // no longer relevant.
- LastEvicted.clearEvicteeInfo(VirtReg.reg());
return PhysReg;
}
@@ -2510,18 +2287,16 @@ MCRegister RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
// Try splitting VirtReg or interferences.
unsigned NewVRegSizeBefore = NewVRegs.size();
Register PhysReg = trySplit(VirtReg, Order, NewVRegs, FixedRegisters);
- if (PhysReg || (NewVRegs.size() - NewVRegSizeBefore)) {
- // If VirtReg got split, the eviction info is no longer relevant.
- LastEvicted.clearEvicteeInfo(VirtReg.reg());
+ if (PhysReg || (NewVRegs.size() - NewVRegSizeBefore))
return PhysReg;
- }
}
// If we couldn't allocate a register from spilling, there is probably some
// invalid inline assembly. The base class will report it.
- if (Stage >= RS_Done || !VirtReg.isSpillable())
+ if (Stage >= RS_Done || !VirtReg.isSpillable()) {
return tryLastChanceRecoloring(VirtReg, Order, NewVRegs, FixedRegisters,
- Depth);
+ RecolorStack, Depth);
+ }
// Finally spill VirtReg itself.
if ((EnableDeferredSpilling ||
@@ -2713,19 +2488,27 @@ void RAGreedy::reportStats() {
}
}
+bool RAGreedy::hasVirtRegAlloc() {
+ for (unsigned I = 0, E = MRI->getNumVirtRegs(); I != E; ++I) {
+ Register Reg = Register::index2VirtReg(I);
+ if (MRI->reg_nodbg_empty(Reg))
+ continue;
+ const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+ if (!RC)
+ continue;
+ if (ShouldAllocateClass(*TRI, *RC))
+ return true;
+ }
+
+ return false;
+}
+
bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
LLVM_DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n"
<< "********** Function: " << mf.getName() << '\n');
MF = &mf;
- TRI = MF->getSubtarget().getRegisterInfo();
TII = MF->getSubtarget().getInstrInfo();
- RCI.runOnMachineFunction(mf);
-
- EnableAdvancedRASplitCost =
- ConsiderLocalIntervalCost.getNumOccurrences()
- ? ConsiderLocalIntervalCost
- : MF->getSubtarget().enableAdvancedRASplitCost();
if (VerifyEnabled)
MF->verify(this, "Before greedy register allocator");
@@ -2733,6 +2516,12 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
RegAllocBase::init(getAnalysis<VirtRegMap>(),
getAnalysis<LiveIntervals>(),
getAnalysis<LiveRegMatrix>());
+
+ // Early return if there is no virtual register to be allocated to a
+ // physical register.
+ if (!hasVirtRegAlloc())
+ return false;
+
Indexes = &getAnalysis<SlotIndexes>();
MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
DomTree = &getAnalysis<MachineDominatorTree>();
@@ -2746,6 +2535,10 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
initializeCSRCost();
RegCosts = TRI->getRegisterCosts(*MF);
+ RegClassPriorityTrumpsGlobalness =
+ GreedyRegClassPriorityTrumpsGlobalness.getNumOccurrences()
+ ? GreedyRegClassPriorityTrumpsGlobalness
+ : TRI->regClassPriorityTrumpsGlobalness(*MF);
ExtraInfo.emplace();
EvictAdvisor =
@@ -2764,7 +2557,6 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI);
GlobalCand.resize(32); // This will grow as needed.
SetOfBrokenHints.clear();
- LastEvicted.clear();
allocatePhysRegs();
tryHintsRecoloring();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.h b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.h
index e9a5fe635f26..358e74541a54 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.h
@@ -12,9 +12,7 @@
#ifndef LLVM_CODEGEN_REGALLOCGREEDY_H_
#define LLVM_CODEGEN_REGALLOCGREEDY_H_
-#include "AllocationOrder.h"
#include "InterferenceCache.h"
-#include "LiveDebugVariables.h"
#include "RegAllocBase.h"
#include "RegAllocEvictionAdvisor.h"
#include "SpillPlacement.h"
@@ -23,52 +21,44 @@
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/IndexedMap.h"
-#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/CalcSpillWeights.h"
-#include "llvm/CodeGen/EdgeBundles.h"
#include "llvm/CodeGen/LiveInterval.h"
-#include "llvm/CodeGen/LiveIntervalUnion.h"
-#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveRangeEdit.h"
-#include "llvm/CodeGen/LiveRegMatrix.h"
-#include "llvm/CodeGen/LiveStacks.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
-#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/CodeGen/Spiller.h"
-#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
-#include "llvm/CodeGen/TargetSubtargetInfo.h"
-#include "llvm/CodeGen/VirtRegMap.h"
-#include "llvm/IR/DebugInfoMetadata.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/BranchProbability.h"
-#include "llvm/Target/TargetMachine.h"
#include <algorithm>
-#include <cassert>
#include <cstdint>
#include <memory>
#include <queue>
-#include <tuple>
#include <utility>
namespace llvm {
+class AllocationOrder;
+class AnalysisUsage;
+class EdgeBundles;
+class LiveDebugVariables;
+class LiveIntervals;
+class LiveRegMatrix;
+class MachineBasicBlock;
+class MachineBlockFrequencyInfo;
+class MachineDominatorTree;
+class MachineLoop;
+class MachineLoopInfo;
+class MachineOptimizationRemarkEmitter;
+class MachineOptimizationRemarkMissed;
+class SlotIndex;
+class SlotIndexes;
+class TargetInstrInfo;
+class VirtRegMap;
+
class LLVM_LIBRARY_VISIBILITY RAGreedy : public MachineFunctionPass,
public RegAllocBase,
private LiveRangeEdit::Delegate {
@@ -162,15 +152,18 @@ public:
private:
// Convenient shortcuts.
using PQueue = std::priority_queue<std::pair<unsigned, unsigned>>;
- using SmallLISet = SmallPtrSet<LiveInterval *, 4>;
+ using SmallLISet = SmallPtrSet<const LiveInterval *, 4>;
+
+ // We need to track all tentative recolorings so we can roll back any
+ // successful and unsuccessful recoloring attempts.
+ using RecoloringStack =
+ SmallVector<std::pair<const LiveInterval *, MCRegister>, 8>;
// context
MachineFunction *MF;
// Shortcuts to some useful interface.
const TargetInstrInfo *TII;
- const TargetRegisterInfo *TRI;
- RegisterClassInfo RCI;
// analyses
SlotIndexes *Indexes;
@@ -210,57 +203,6 @@ private:
static const char *const StageName[];
#endif
- /// EvictionTrack - Keeps track of past evictions in order to optimize region
- /// split decision.
- class EvictionTrack {
-
- public:
- using EvictorInfo =
- std::pair<Register /* evictor */, MCRegister /* physreg */>;
- using EvicteeInfo = llvm::DenseMap<Register /* evictee */, EvictorInfo>;
-
- private:
- /// Each Vreg that has been evicted in the last stage of selectOrSplit will
- /// be mapped to the evictor Vreg and the PhysReg it was evicted from.
- EvicteeInfo Evictees;
-
- public:
- /// Clear all eviction information.
- void clear() { Evictees.clear(); }
-
- /// Clear eviction information for the given evictee Vreg.
- /// E.g. when Vreg get's a new allocation, the old eviction info is no
- /// longer relevant.
- /// \param Evictee The evictee Vreg for whom we want to clear collected
- /// eviction info.
- void clearEvicteeInfo(Register Evictee) { Evictees.erase(Evictee); }
-
- /// Track new eviction.
- /// The Evictor vreg has evicted the Evictee vreg from Physreg.
- /// \param PhysReg The physical register Evictee was evicted from.
- /// \param Evictor The evictor Vreg that evicted Evictee.
- /// \param Evictee The evictee Vreg.
- void addEviction(MCRegister PhysReg, Register Evictor, Register Evictee) {
- Evictees[Evictee].first = Evictor;
- Evictees[Evictee].second = PhysReg;
- }
-
- /// Return the Evictor Vreg which evicted Evictee Vreg from PhysReg.
- /// \param Evictee The evictee vreg.
- /// \return The Evictor vreg which evicted Evictee vreg from PhysReg. 0 if
- /// nobody has evicted Evictee from PhysReg.
- EvictorInfo getEvictor(Register Evictee) {
- if (Evictees.count(Evictee)) {
- return Evictees[Evictee];
- }
-
- return EvictorInfo(0, 0);
- }
- };
-
- // Keeps track of past evictions in order to optimize region split decision.
- EvictionTrack LastEvicted;
-
// splitting state.
std::unique_ptr<SplitAnalysis> SA;
std::unique_ptr<SplitEditor> SE;
@@ -320,17 +262,17 @@ private:
/// Callee-save register cost, calculated once per machine function.
BlockFrequency CSRCost;
- /// Enable or not the consideration of the cost of local intervals created
- /// by a split candidate when choosing the best split candidate.
- bool EnableAdvancedRASplitCost;
-
/// Set of broken hints that may be reconciled later because of eviction.
- SmallSetVector<LiveInterval *, 8> SetOfBrokenHints;
+ SmallSetVector<const LiveInterval *, 8> SetOfBrokenHints;
/// The register cost values. This list will be recreated for each Machine
/// Function
ArrayRef<uint8_t> RegCosts;
+ /// Flags for the live range priority calculation, determined once per
+ /// machine function.
+ bool RegClassPriorityTrumpsGlobalness;
+
public:
RAGreedy(const RegClassFilterFunc F = allocateAllRegClasses);
@@ -341,11 +283,11 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override;
void releaseMemory() override;
Spiller &spiller() override { return *SpillerInstance; }
- void enqueueImpl(LiveInterval *LI) override;
- LiveInterval *dequeue() override;
- MCRegister selectOrSplit(LiveInterval &,
+ void enqueueImpl(const LiveInterval *LI) override;
+ const LiveInterval *dequeue() override;
+ MCRegister selectOrSplit(const LiveInterval &,
SmallVectorImpl<Register> &) override;
- void aboutToRemoveInterval(LiveInterval &) override;
+ void aboutToRemoveInterval(const LiveInterval &) override;
/// Perform register allocation.
bool runOnMachineFunction(MachineFunction &mf) override;
@@ -363,81 +305,70 @@ public:
static char ID;
private:
- MCRegister selectOrSplitImpl(LiveInterval &, SmallVectorImpl<Register> &,
- SmallVirtRegSet &, unsigned = 0);
+ MCRegister selectOrSplitImpl(const LiveInterval &,
+ SmallVectorImpl<Register> &, SmallVirtRegSet &,
+ RecoloringStack &, unsigned = 0);
bool LRE_CanEraseVirtReg(Register) override;
void LRE_WillShrinkVirtReg(Register) override;
void LRE_DidCloneVirtReg(Register, Register) override;
- void enqueue(PQueue &CurQueue, LiveInterval *LI);
- LiveInterval *dequeue(PQueue &CurQueue);
+ void enqueue(PQueue &CurQueue, const LiveInterval *LI);
+ const LiveInterval *dequeue(PQueue &CurQueue);
+ bool hasVirtRegAlloc();
BlockFrequency calcSpillCost();
bool addSplitConstraints(InterferenceCache::Cursor, BlockFrequency &);
bool addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>);
bool growRegion(GlobalSplitCandidate &Cand);
- bool splitCanCauseEvictionChain(Register Evictee, GlobalSplitCandidate &Cand,
- unsigned BBNumber,
- const AllocationOrder &Order);
- bool splitCanCauseLocalSpill(unsigned VirtRegToSplit,
- GlobalSplitCandidate &Cand, unsigned BBNumber,
- const AllocationOrder &Order);
BlockFrequency calcGlobalSplitCost(GlobalSplitCandidate &,
- const AllocationOrder &Order,
- bool *CanCauseEvictionChain);
+ const AllocationOrder &Order);
bool calcCompactRegion(GlobalSplitCandidate &);
void splitAroundRegion(LiveRangeEdit &, ArrayRef<unsigned>);
void calcGapWeights(MCRegister, SmallVectorImpl<float> &);
- bool canEvictInterferenceInRange(const LiveInterval &VirtReg,
- MCRegister PhysReg, SlotIndex Start,
- SlotIndex End, EvictionCost &MaxCost) const;
- MCRegister getCheapestEvicteeWeight(const AllocationOrder &Order,
- const LiveInterval &VirtReg,
- SlotIndex Start, SlotIndex End,
- float *BestEvictWeight) const;
- void evictInterference(LiveInterval &, MCRegister,
+ void evictInterference(const LiveInterval &, MCRegister,
SmallVectorImpl<Register> &);
- bool mayRecolorAllInterferences(MCRegister PhysReg, LiveInterval &VirtReg,
+ bool mayRecolorAllInterferences(MCRegister PhysReg,
+ const LiveInterval &VirtReg,
SmallLISet &RecoloringCandidates,
const SmallVirtRegSet &FixedRegisters);
- MCRegister tryAssign(LiveInterval &, AllocationOrder &,
+ MCRegister tryAssign(const LiveInterval &, AllocationOrder &,
SmallVectorImpl<Register> &, const SmallVirtRegSet &);
- MCRegister tryEvict(LiveInterval &, AllocationOrder &,
+ MCRegister tryEvict(const LiveInterval &, AllocationOrder &,
SmallVectorImpl<Register> &, uint8_t,
const SmallVirtRegSet &);
- MCRegister tryRegionSplit(LiveInterval &, AllocationOrder &,
+ MCRegister tryRegionSplit(const LiveInterval &, AllocationOrder &,
SmallVectorImpl<Register> &);
/// Calculate cost of region splitting.
- unsigned calculateRegionSplitCost(LiveInterval &VirtReg,
+ unsigned calculateRegionSplitCost(const LiveInterval &VirtReg,
AllocationOrder &Order,
BlockFrequency &BestCost,
- unsigned &NumCands, bool IgnoreCSR,
- bool *CanCauseEvictionChain = nullptr);
+ unsigned &NumCands, bool IgnoreCSR);
/// Perform region splitting.
- unsigned doRegionSplit(LiveInterval &VirtReg, unsigned BestCand,
+ unsigned doRegionSplit(const LiveInterval &VirtReg, unsigned BestCand,
bool HasCompact, SmallVectorImpl<Register> &NewVRegs);
/// Check other options before using a callee-saved register for the first
/// time.
- MCRegister tryAssignCSRFirstTime(LiveInterval &VirtReg,
+ MCRegister tryAssignCSRFirstTime(const LiveInterval &VirtReg,
AllocationOrder &Order, MCRegister PhysReg,
uint8_t &CostPerUseLimit,
SmallVectorImpl<Register> &NewVRegs);
void initializeCSRCost();
- unsigned tryBlockSplit(LiveInterval &, AllocationOrder &,
+ unsigned tryBlockSplit(const LiveInterval &, AllocationOrder &,
SmallVectorImpl<Register> &);
- unsigned tryInstructionSplit(LiveInterval &, AllocationOrder &,
+ unsigned tryInstructionSplit(const LiveInterval &, AllocationOrder &,
SmallVectorImpl<Register> &);
- unsigned tryLocalSplit(LiveInterval &, AllocationOrder &,
+ unsigned tryLocalSplit(const LiveInterval &, AllocationOrder &,
SmallVectorImpl<Register> &);
- unsigned trySplit(LiveInterval &, AllocationOrder &,
+ unsigned trySplit(const LiveInterval &, AllocationOrder &,
SmallVectorImpl<Register> &, const SmallVirtRegSet &);
- unsigned tryLastChanceRecoloring(LiveInterval &, AllocationOrder &,
+ unsigned tryLastChanceRecoloring(const LiveInterval &, AllocationOrder &,
SmallVectorImpl<Register> &,
- SmallVirtRegSet &, unsigned);
+ SmallVirtRegSet &, RecoloringStack &,
+ unsigned);
bool tryRecoloringCandidates(PQueue &, SmallVectorImpl<Register> &,
- SmallVirtRegSet &, unsigned);
- void tryHintRecoloring(LiveInterval &);
+ SmallVirtRegSet &, RecoloringStack &, unsigned);
+ void tryHintRecoloring(const LiveInterval &);
void tryHintsRecoloring();
/// Model the information carried by one end of a copy.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPBQP.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPBQP.cpp
index 93be8f689d57..8c262130fb70 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPBQP.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPBQP.cpp
@@ -847,6 +847,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
while (!PBQPAllocComplete) {
LLVM_DEBUG(dbgs() << " PBQP Regalloc round " << Round << ":\n");
+ (void) Round;
PBQPRAGraph G(PBQPRAGraph::GraphMetadata(MF, LIS, MBFI));
initializeGraph(G, VRM, *VRegSpiller);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocScore.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocScore.cpp
index 740890831617..32fa5e07dd16 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocScore.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocScore.cpp
@@ -13,19 +13,19 @@
//===----------------------------------------------------------------------===//
#include "RegAllocScore.h"
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/ADT/STLForwardCompat.h"
#include "llvm/ADT/SetVector.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/ADT/ilist_iterator.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBundleIterator.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
-#include <cassert>
-#include <cstdint>
-#include <numeric>
-#include <vector>
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Support/CommandLine.h"
using namespace llvm;
cl::opt<double> CopyWeight("regalloc-copy-weight", cl::init(0.2), cl::Hidden);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocScore.h b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocScore.h
index 3c28bb61189d..2bcd0b5895bf 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocScore.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocScore.h
@@ -15,21 +15,16 @@
#ifndef LLVM_CODEGEN_REGALLOCSCORE_H_
#define LLVM_CODEGEN_REGALLOCSCORE_H_
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm/Analysis/ProfileSummaryInfo.h"
-#include "llvm/Analysis/Utils/TFUtils.h"
-#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/SelectionDAGNodes.h"
-#include "llvm/IR/Module.h"
-#include <cassert>
-#include <cstdint>
-#include <limits>
+#include "llvm/ADT/STLFunctionalExtras.h"
namespace llvm {
+class AAResults;
+class MachineBasicBlock;
+class MachineBlockFrequencyInfo;
+class MachineFunction;
+class MachineInstr;
+
/// Regalloc score.
class RegAllocScore final {
double CopyCounts = 0.0;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegUsageInfoCollector.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegUsageInfoCollector.cpp
index 5a79ac44dcf4..16afd15e29e4 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegUsageInfoCollector.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegUsageInfoCollector.cpp
@@ -17,16 +17,15 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegisterUsageInfo.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/IR/Function.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/CodeGen/TargetFrameLowering.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp
index 800d952469a5..d356962e0d78 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp
@@ -19,8 +19,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
@@ -29,7 +29,6 @@
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterBank.cpp
index 5c4d18ad79c5..512b21aeacaf 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterBank.cpp
@@ -9,7 +9,7 @@
/// This file implements the RegisterBank class.
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
+#include "llvm/CodeGen/RegisterBank.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/Config/llvm-config.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterBankInfo.cpp
index 650500c7eb31..de851ffc7fdc 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterBankInfo.cpp
@@ -9,20 +9,17 @@
/// This file implements the RegisterBankInfo class.
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
-#include "llvm/ADT/SmallString.h"
+#include "llvm/CodeGen/RegisterBankInfo.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/iterator_range.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterBank.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Config/llvm-config.h"
-#include "llvm/IR/Type.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterClassInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterClassInfo.cpp
index 65a65b9cae95..374fcc9a6014 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterClassInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterClassInfo.cpp
@@ -19,7 +19,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
@@ -44,9 +43,11 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) {
bool Update = false;
MF = &mf;
+ auto &STI = MF->getSubtarget();
+
// Allocate new array the first time we see a new target.
- if (MF->getSubtarget().getRegisterInfo() != TRI) {
- TRI = MF->getSubtarget().getRegisterInfo();
+ if (STI.getRegisterInfo() != TRI) {
+ TRI = STI.getRegisterInfo();
RegClass.reset(new RCInfo[TRI->getNumRegClasses()]);
Update = true;
}
@@ -68,6 +69,18 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) {
}
CalleeSavedRegs = CSR;
+ // Even if CSR list is same, we could have had a different allocation order
+ // if ignoreCSRForAllocationOrder is evaluated differently.
+ BitVector CSRHintsForAllocOrder(TRI->getNumRegs());
+ for (const MCPhysReg *I = CSR; *I; ++I)
+ for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI)
+ CSRHintsForAllocOrder[*AI] = STI.ignoreCSRForAllocationOrder(mf, *AI);
+ if (IgnoreCSRForAllocOrder.size() != CSRHintsForAllocOrder.size() ||
+ IgnoreCSRForAllocOrder != CSRHintsForAllocOrder) {
+ Update = true;
+ IgnoreCSRForAllocOrder = CSRHintsForAllocOrder;
+ }
+
RegCosts = TRI->getRegisterCosts(*MF);
// Different reserved registers?
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp
index a917b0d27d4a..930d05324440 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -1647,7 +1647,7 @@ MachineInstr *RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
for (unsigned i = CopyMI->getNumOperands(); i != 0; --i) {
MachineOperand &MO = CopyMI->getOperand(i-1);
if (MO.isReg() && MO.isUse())
- CopyMI->RemoveOperand(i-1);
+ CopyMI->removeOperand(i-1);
}
LLVM_DEBUG(dbgs() << "\tReplaced copy of <undef> value with an "
"implicit def\n");
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp
index 424ad7419165..289d31be2d2d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp
@@ -37,11 +37,9 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
#include <cassert>
#include <iterator>
#include <limits>
-#include <string>
#include <utility>
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterUsageInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterUsageInfo.cpp
index 6858d7233bc5..9d9cdf9edbb3 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterUsageInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterUsageInfo.cpp
@@ -22,8 +22,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
-#include <algorithm>
-#include <cassert>
#include <cstdint>
#include <utility>
#include <vector>
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp
index 49859aeec78b..01886e40a4a3 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp
@@ -12,13 +12,12 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/Function.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
/// \file RemoveRedundantDebugValues.cpp
///
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RenameIndependentSubregs.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
index 0872ec303460..466022ae0ac1 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
@@ -33,9 +33,9 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ReplaceWithVeclib.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
index 0ff045fa787e..87b8ac59bdba 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
@@ -1,4 +1,4 @@
-//=== ReplaceWithVeclib.cpp - Replace vector instrinsics with veclib calls ===//
+//=== ReplaceWithVeclib.cpp - Replace vector intrinsics with veclib calls -===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -23,7 +23,6 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
-#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
using namespace llvm;
@@ -110,7 +109,7 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
auto *ArgType = Arg.value()->getType();
// Vector calls to intrinsics can still have
// scalar operands for specific arguments.
- if (hasVectorInstrinsicScalarOpd(IntrinsicID, Arg.index())) {
+ if (isVectorIntrinsicWithScalarOpAtArg(IntrinsicID, Arg.index())) {
ScalarTypes.push_back(ArgType);
} else {
// The argument in this place should be a vector if
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp
index 3d8a7eecce18..e7116ec3ea28 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp
@@ -17,7 +17,6 @@
#include "SafeStackLayout.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -49,10 +48,10 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
-#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
@@ -97,31 +96,12 @@ static cl::opt<bool>
SafeStackUsePointerAddress("safestack-use-pointer-address",
cl::init(false), cl::Hidden);
-// Disabled by default due to PR32143.
static cl::opt<bool> ClColoring("safe-stack-coloring",
cl::desc("enable safe stack coloring"),
- cl::Hidden, cl::init(false));
+ cl::Hidden, cl::init(true));
namespace {
-/// Rewrite an SCEV expression for a memory access address to an expression that
-/// represents offset from the given alloca.
-///
-/// The implementation simply replaces all mentions of the alloca with zero.
-class AllocaOffsetRewriter : public SCEVRewriteVisitor<AllocaOffsetRewriter> {
- const Value *AllocaPtr;
-
-public:
- AllocaOffsetRewriter(ScalarEvolution &SE, const Value *AllocaPtr)
- : SCEVRewriteVisitor(SE), AllocaPtr(AllocaPtr) {}
-
- const SCEV *visitUnknown(const SCEVUnknown *Expr) {
- if (Expr->getValue() == AllocaPtr)
- return SE.getZero(Expr->getType());
- return Expr;
- }
-};
-
/// The SafeStack pass splits the stack of each function into the safe
/// stack, which is only accessed through memory safe dereferences (as
/// determined statically), and the unsafe stack, which contains all
@@ -147,7 +127,7 @@ class SafeStack {
///
/// 16 seems like a reasonable upper bound on the alignment of objects that we
/// might expect to appear on the stack on most common targets.
- static constexpr uint64_t StackAlignment = 16;
+ static constexpr Align StackAlignment = Align::Constant<16>();
/// Return the value of the stack canary.
Value *getStackGuard(IRBuilder<> &IRB, Function &F);
@@ -221,7 +201,7 @@ public:
bool run();
};
-constexpr uint64_t SafeStack::StackAlignment;
+constexpr Align SafeStack::StackAlignment;
uint64_t SafeStack::getStaticAllocaAllocationSize(const AllocaInst* AI) {
uint64_t Size = DL.getTypeAllocSize(AI->getAllocatedType());
@@ -236,9 +216,18 @@ uint64_t SafeStack::getStaticAllocaAllocationSize(const AllocaInst* AI) {
bool SafeStack::IsAccessSafe(Value *Addr, uint64_t AccessSize,
const Value *AllocaPtr, uint64_t AllocaSize) {
- AllocaOffsetRewriter Rewriter(SE, AllocaPtr);
- const SCEV *Expr = Rewriter.visit(SE.getSCEV(Addr));
+ const SCEV *AddrExpr = SE.getSCEV(Addr);
+ const auto *Base = dyn_cast<SCEVUnknown>(SE.getPointerBase(AddrExpr));
+ if (!Base || Base->getValue() != AllocaPtr) {
+ LLVM_DEBUG(
+ dbgs() << "[SafeStack] "
+ << (isa<AllocaInst>(AllocaPtr) ? "Alloca " : "ByValArgument ")
+ << *AllocaPtr << "\n"
+ << "SCEV " << *AddrExpr << " not directly based on alloca\n");
+ return false;
+ }
+ const SCEV *Expr = SE.removePointerBase(AddrExpr);
uint64_t BitWidth = SE.getTypeSizeInBits(Expr->getType());
ConstantRange AccessStartRange = SE.getUnsignedRange(Expr);
ConstantRange SizeRange =
@@ -645,6 +634,13 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
// FIXME: no need to update BasePointer in leaf functions.
unsigned FrameSize = alignTo(SSL.getFrameSize(), StackAlignment);
+ MDBuilder MDB(F.getContext());
+ SmallVector<Metadata *, 2> Data;
+ Data.push_back(MDB.createString("unsafe-stack-size"));
+ Data.push_back(MDB.createConstant(ConstantInt::get(Int32Ty, FrameSize)));
+ MDNode *MD = MDTuple::get(F.getContext(), Data);
+ F.setMetadata(LLVMContext::MD_annotation, MD);
+
// Update shadow stack pointer in the function epilogue.
IRB.SetInsertPoint(BasePointer->getNextNode());
@@ -677,13 +673,12 @@ void SafeStack::moveDynamicAllocasToUnsafeStack(
SP = IRB.CreateSub(SP, Size);
// Align the SP value to satisfy the AllocaInst, type and stack alignments.
- uint64_t Align =
- std::max(std::max(DL.getPrefTypeAlignment(Ty), AI->getAlignment()),
- StackAlignment);
+ auto Align = std::max(std::max(DL.getPrefTypeAlign(Ty), AI->getAlign()),
+ StackAlignment);
- assert(isPowerOf2_32(Align));
Value *NewTop = IRB.CreateIntToPtr(
- IRB.CreateAnd(SP, ConstantInt::get(IntPtrTy, ~uint64_t(Align - 1))),
+ IRB.CreateAnd(SP,
+ ConstantInt::get(IntPtrTy, ~uint64_t(Align.value() - 1))),
StackPtrTy);
// Save the stack pointer.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.cpp
index 602afcfa9001..f821145f4b63 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.cpp
@@ -11,7 +11,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.h b/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.h
index 4ac7af2059f5..6126c7a67854 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.h
@@ -52,7 +52,7 @@ class StackLayout {
void layoutObject(StackObject &Obj);
public:
- StackLayout(uint64_t StackAlignment) : MaxAlignment(StackAlignment) {}
+ StackLayout(Align StackAlignment) : MaxAlignment(StackAlignment) {}
/// Add an object to the stack frame. Value pointer is opaque and used as a
/// handle to retrieve the object's offset in the frame later.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
index 0e8e8338b46d..07dcc34fbf15 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -14,7 +14,6 @@
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/ADT/IntEqClasses.h"
#include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/SparseSet.h"
#include "llvm/ADT/iterator_range.h"
@@ -40,9 +39,6 @@
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Operator.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/MC/LaneBitmask.h"
@@ -65,9 +61,9 @@ using namespace llvm;
#define DEBUG_TYPE "machine-scheduler"
-static cl::opt<bool> EnableAASchedMI("enable-aa-sched-mi", cl::Hidden,
- cl::ZeroOrMore, cl::init(false),
- cl::desc("Enable use of AA during MI DAG construction"));
+static cl::opt<bool>
+ EnableAASchedMI("enable-aa-sched-mi", cl::Hidden,
+ cl::desc("Enable use of AA during MI DAG construction"));
static cl::opt<bool> UseTBAA("use-tbaa-in-sched-mi", cl::Hidden,
cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"));
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp
index 05b2a3764cca..e7b14944acfe 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp
@@ -10,13 +10,8 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/CodeGen/TargetRegisterInfo.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp
new file mode 100644
index 000000000000..c199b6a6cca8
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp
@@ -0,0 +1,989 @@
+//===--- SelectOptimize.cpp - Convert select to branches if profitable ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass converts selects to conditional jumps when profitable.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/ScaledNumber.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/SizeOpts.h"
+#include <algorithm>
+#include <memory>
+#include <queue>
+#include <stack>
+#include <string>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "select-optimize"
+
+STATISTIC(NumSelectOptAnalyzed,
+ "Number of select groups considered for conversion to branch");
+STATISTIC(NumSelectConvertedExpColdOperand,
+ "Number of select groups converted due to expensive cold operand");
+STATISTIC(NumSelectConvertedHighPred,
+ "Number of select groups converted due to high-predictability");
+STATISTIC(NumSelectUnPred,
+ "Number of select groups not converted due to unpredictability");
+STATISTIC(NumSelectColdBB,
+ "Number of select groups not converted due to cold basic block");
+STATISTIC(NumSelectConvertedLoop,
+ "Number of select groups converted due to loop-level analysis");
+STATISTIC(NumSelectsConverted, "Number of selects converted");
+
+static cl::opt<unsigned> ColdOperandThreshold(
+ "cold-operand-threshold",
+ cl::desc("Maximum frequency of path for an operand to be considered cold."),
+ cl::init(20), cl::Hidden);
+
+static cl::opt<unsigned> ColdOperandMaxCostMultiplier(
+ "cold-operand-max-cost-multiplier",
+ cl::desc("Maximum cost multiplier of TCC_expensive for the dependence "
+ "slice of a cold operand to be considered inexpensive."),
+ cl::init(1), cl::Hidden);
+
+static cl::opt<unsigned>
+ GainGradientThreshold("select-opti-loop-gradient-gain-threshold",
+ cl::desc("Gradient gain threshold (%)."),
+ cl::init(25), cl::Hidden);
+
+static cl::opt<unsigned>
+ GainCycleThreshold("select-opti-loop-cycle-gain-threshold",
+ cl::desc("Minimum gain per loop (in cycles) threshold."),
+ cl::init(4), cl::Hidden);
+
+static cl::opt<unsigned> GainRelativeThreshold(
+ "select-opti-loop-relative-gain-threshold",
+ cl::desc(
+ "Minimum relative gain per loop threshold (1/X). Defaults to 12.5%"),
+ cl::init(8), cl::Hidden);
+
+static cl::opt<unsigned> MispredictDefaultRate(
+ "mispredict-default-rate", cl::Hidden, cl::init(25),
+ cl::desc("Default mispredict rate (initialized to 25%)."));
+
+static cl::opt<bool>
+ DisableLoopLevelHeuristics("disable-loop-level-heuristics", cl::Hidden,
+ cl::init(false),
+ cl::desc("Disable loop-level heuristics."));
+
+namespace {
+
+class SelectOptimize : public FunctionPass {
+ const TargetMachine *TM = nullptr;
+ const TargetSubtargetInfo *TSI;
+ const TargetLowering *TLI = nullptr;
+ const TargetTransformInfo *TTI = nullptr;
+ const LoopInfo *LI;
+ DominatorTree *DT;
+ std::unique_ptr<BlockFrequencyInfo> BFI;
+ std::unique_ptr<BranchProbabilityInfo> BPI;
+ ProfileSummaryInfo *PSI;
+ OptimizationRemarkEmitter *ORE;
+ TargetSchedModel TSchedModel;
+
+public:
+ static char ID;
+
+ SelectOptimize() : FunctionPass(ID) {
+ initializeSelectOptimizePass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
+ AU.addRequired<TargetPassConfig>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
+ }
+
+private:
+ // Select groups consist of consecutive select instructions with the same
+ // condition.
+ using SelectGroup = SmallVector<SelectInst *, 2>;
+ using SelectGroups = SmallVector<SelectGroup, 2>;
+
+ using Scaled64 = ScaledNumber<uint64_t>;
+
+ struct CostInfo {
+ /// Predicated cost (with selects as conditional moves).
+ Scaled64 PredCost;
+ /// Non-predicated cost (with selects converted to branches).
+ Scaled64 NonPredCost;
+ };
+
+ // Converts select instructions of a function to conditional jumps when deemed
+ // profitable. Returns true if at least one select was converted.
+ bool optimizeSelects(Function &F);
+
+ // Heuristics for determining which select instructions can be profitably
+ // conveted to branches. Separate heuristics for selects in inner-most loops
+ // and the rest of code regions (base heuristics for non-inner-most loop
+ // regions).
+ void optimizeSelectsBase(Function &F, SelectGroups &ProfSIGroups);
+ void optimizeSelectsInnerLoops(Function &F, SelectGroups &ProfSIGroups);
+
+ // Converts to branches the select groups that were deemed
+ // profitable-to-convert.
+ void convertProfitableSIGroups(SelectGroups &ProfSIGroups);
+
+ // Splits selects of a given basic block into select groups.
+ void collectSelectGroups(BasicBlock &BB, SelectGroups &SIGroups);
+
+ // Determines for which select groups it is profitable converting to branches
+ // (base and inner-most-loop heuristics).
+ void findProfitableSIGroupsBase(SelectGroups &SIGroups,
+ SelectGroups &ProfSIGroups);
+ void findProfitableSIGroupsInnerLoops(const Loop *L, SelectGroups &SIGroups,
+ SelectGroups &ProfSIGroups);
+
+ // Determines if a select group should be converted to a branch (base
+ // heuristics).
+ bool isConvertToBranchProfitableBase(const SmallVector<SelectInst *, 2> &ASI);
+
+ // Returns true if there are expensive instructions in the cold value
+ // operand's (if any) dependence slice of any of the selects of the given
+ // group.
+ bool hasExpensiveColdOperand(const SmallVector<SelectInst *, 2> &ASI);
+
+ // For a given source instruction, collect its backwards dependence slice
+ // consisting of instructions exclusively computed for producing the operands
+ // of the source instruction.
+ void getExclBackwardsSlice(Instruction *I, std::stack<Instruction *> &Slice,
+ bool ForSinking = false);
+
+ // Returns true if the condition of the select is highly predictable.
+ bool isSelectHighlyPredictable(const SelectInst *SI);
+
+ // Loop-level checks to determine if a non-predicated version (with branches)
+ // of the given loop is more profitable than its predicated version.
+ bool checkLoopHeuristics(const Loop *L, const CostInfo LoopDepth[2]);
+
+ // Computes instruction and loop-critical-path costs for both the predicated
+ // and non-predicated version of the given loop.
+ bool computeLoopCosts(const Loop *L, const SelectGroups &SIGroups,
+ DenseMap<const Instruction *, CostInfo> &InstCostMap,
+ CostInfo *LoopCost);
+
+ // Returns a set of all the select instructions in the given select groups.
+ SmallPtrSet<const Instruction *, 2> getSIset(const SelectGroups &SIGroups);
+
+ // Returns the latency cost of a given instruction.
+ Optional<uint64_t> computeInstCost(const Instruction *I);
+
+ // Returns the misprediction cost of a given select when converted to branch.
+ Scaled64 getMispredictionCost(const SelectInst *SI, const Scaled64 CondCost);
+
+ // Returns the cost of a branch when the prediction is correct.
+ Scaled64 getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost,
+ const SelectInst *SI);
+
+ // Returns true if the target architecture supports lowering a given select.
+ bool isSelectKindSupported(SelectInst *SI);
+};
+} // namespace
+
+char SelectOptimize::ID = 0;
+
+INITIALIZE_PASS_BEGIN(SelectOptimize, DEBUG_TYPE, "Optimize selects", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
+INITIALIZE_PASS_END(SelectOptimize, DEBUG_TYPE, "Optimize selects", false,
+ false)
+
+FunctionPass *llvm::createSelectOptimizePass() { return new SelectOptimize(); }
+
+bool SelectOptimize::runOnFunction(Function &F) {
+ TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
+ TSI = TM->getSubtargetImpl(F);
+ TLI = TSI->getTargetLowering();
+
+ // If none of the select types is supported then skip this pass.
+ // This is an optimization pass. Legality issues will be handled by
+ // instruction selection.
+ if (!TLI->isSelectSupported(TargetLowering::ScalarValSelect) &&
+ !TLI->isSelectSupported(TargetLowering::ScalarCondVectorVal) &&
+ !TLI->isSelectSupported(TargetLowering::VectorMaskSelect))
+ return false;
+
+ TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ BPI.reset(new BranchProbabilityInfo(F, *LI));
+ BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI));
+ PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
+ TSchedModel.init(TSI);
+
+ // When optimizing for size, selects are preferable over branches.
+ if (F.hasOptSize() || llvm::shouldOptimizeForSize(&F, PSI, BFI.get()))
+ return false;
+
+ return optimizeSelects(F);
+}
+
+bool SelectOptimize::optimizeSelects(Function &F) {
+ // Determine for which select groups it is profitable converting to branches.
+ SelectGroups ProfSIGroups;
+ // Base heuristics apply only to non-loops and outer loops.
+ optimizeSelectsBase(F, ProfSIGroups);
+ // Separate heuristics for inner-most loops.
+ optimizeSelectsInnerLoops(F, ProfSIGroups);
+
+ // Convert to branches the select groups that were deemed
+ // profitable-to-convert.
+ convertProfitableSIGroups(ProfSIGroups);
+
+ // Code modified if at least one select group was converted.
+ return !ProfSIGroups.empty();
+}
+
+void SelectOptimize::optimizeSelectsBase(Function &F,
+ SelectGroups &ProfSIGroups) {
+ // Collect all the select groups.
+ SelectGroups SIGroups;
+ for (BasicBlock &BB : F) {
+ // Base heuristics apply only to non-loops and outer loops.
+ Loop *L = LI->getLoopFor(&BB);
+ if (L && L->isInnermost())
+ continue;
+ collectSelectGroups(BB, SIGroups);
+ }
+
+ // Determine for which select groups it is profitable converting to branches.
+ findProfitableSIGroupsBase(SIGroups, ProfSIGroups);
+}
+
+void SelectOptimize::optimizeSelectsInnerLoops(Function &F,
+ SelectGroups &ProfSIGroups) {
+ SmallVector<Loop *, 4> Loops(LI->begin(), LI->end());
+ // Need to check size on each iteration as we accumulate child loops.
+ for (unsigned long i = 0; i < Loops.size(); ++i)
+ for (Loop *ChildL : Loops[i]->getSubLoops())
+ Loops.push_back(ChildL);
+
+ for (Loop *L : Loops) {
+ if (!L->isInnermost())
+ continue;
+
+ SelectGroups SIGroups;
+ for (BasicBlock *BB : L->getBlocks())
+ collectSelectGroups(*BB, SIGroups);
+
+ findProfitableSIGroupsInnerLoops(L, SIGroups, ProfSIGroups);
+ }
+}
+
+/// If \p isTrue is true, return the true value of \p SI, otherwise return
+/// false value of \p SI. If the true/false value of \p SI is defined by any
+/// select instructions in \p Selects, look through the defining select
+/// instruction until the true/false value is not defined in \p Selects.
+static Value *
+getTrueOrFalseValue(SelectInst *SI, bool isTrue,
+ const SmallPtrSet<const Instruction *, 2> &Selects) {
+ Value *V = nullptr;
+ for (SelectInst *DefSI = SI; DefSI != nullptr && Selects.count(DefSI);
+ DefSI = dyn_cast<SelectInst>(V)) {
+ assert(DefSI->getCondition() == SI->getCondition() &&
+ "The condition of DefSI does not match with SI");
+ V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue());
+ }
+ assert(V && "Failed to get select true/false value");
+ return V;
+}
+
+void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {
+ for (SelectGroup &ASI : ProfSIGroups) {
+ // The code transformation here is a modified version of the sinking
+ // transformation in CodeGenPrepare::optimizeSelectInst with a more
+ // aggressive strategy of which instructions to sink.
+ //
+ // TODO: eliminate the redundancy of logic transforming selects to branches
+ // by removing CodeGenPrepare::optimizeSelectInst and optimizing here
+ // selects for all cases (with and without profile information).
+
+ // Transform a sequence like this:
+ // start:
+ // %cmp = cmp uge i32 %a, %b
+ // %sel = select i1 %cmp, i32 %c, i32 %d
+ //
+ // Into:
+ // start:
+ // %cmp = cmp uge i32 %a, %b
+ // %cmp.frozen = freeze %cmp
+ // br i1 %cmp.frozen, label %select.true, label %select.false
+ // select.true:
+ // br label %select.end
+ // select.false:
+ // br label %select.end
+ // select.end:
+ // %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ]
+ //
+ // %cmp should be frozen, otherwise it may introduce undefined behavior.
+ // In addition, we may sink instructions that produce %c or %d into the
+ // destination(s) of the new branch.
+ // If the true or false blocks do not contain a sunken instruction, that
+ // block and its branch may be optimized away. In that case, one side of the
+ // first branch will point directly to select.end, and the corresponding PHI
+ // predecessor block will be the start block.
+
+ // Find all the instructions that can be soundly sunk to the true/false
+ // blocks. These are instructions that are computed solely for producing the
+ // operands of the select instructions in the group and can be sunk without
+ // breaking the semantics of the LLVM IR (e.g., cannot sink instructions
+ // with side effects).
+ SmallVector<std::stack<Instruction *>, 2> TrueSlices, FalseSlices;
+ typedef std::stack<Instruction *>::size_type StackSizeType;
+ StackSizeType maxTrueSliceLen = 0, maxFalseSliceLen = 0;
+ for (SelectInst *SI : ASI) {
+ // For each select, compute the sinkable dependence chains of the true and
+ // false operands.
+ if (auto *TI = dyn_cast<Instruction>(SI->getTrueValue())) {
+ std::stack<Instruction *> TrueSlice;
+ getExclBackwardsSlice(TI, TrueSlice, true);
+ maxTrueSliceLen = std::max(maxTrueSliceLen, TrueSlice.size());
+ TrueSlices.push_back(TrueSlice);
+ }
+ if (auto *FI = dyn_cast<Instruction>(SI->getFalseValue())) {
+ std::stack<Instruction *> FalseSlice;
+ getExclBackwardsSlice(FI, FalseSlice, true);
+ maxFalseSliceLen = std::max(maxFalseSliceLen, FalseSlice.size());
+ FalseSlices.push_back(FalseSlice);
+ }
+ }
+ // In the case of multiple select instructions in the same group, the order
+ // of non-dependent instructions (instructions of different dependence
+ // slices) in the true/false blocks appears to affect performance.
+ // Interleaving the slices seems to experimentally be the optimal approach.
+ // This interleaving scheduling allows for more ILP (with a natural downside
+ // of increasing a bit register pressure) compared to a simple ordering of
+ // one whole chain after another. One would expect that this ordering would
+ // not matter since the scheduling in the backend of the compiler would
+ // take care of it, but apparently the scheduler fails to deliver optimal
+ // ILP with a naive ordering here.
+ SmallVector<Instruction *, 2> TrueSlicesInterleaved, FalseSlicesInterleaved;
+ for (StackSizeType IS = 0; IS < maxTrueSliceLen; ++IS) {
+ for (auto &S : TrueSlices) {
+ if (!S.empty()) {
+ TrueSlicesInterleaved.push_back(S.top());
+ S.pop();
+ }
+ }
+ }
+ for (StackSizeType IS = 0; IS < maxFalseSliceLen; ++IS) {
+ for (auto &S : FalseSlices) {
+ if (!S.empty()) {
+ FalseSlicesInterleaved.push_back(S.top());
+ S.pop();
+ }
+ }
+ }
+
+ // We split the block containing the select(s) into two blocks.
+ SelectInst *SI = ASI.front();
+ SelectInst *LastSI = ASI.back();
+ BasicBlock *StartBlock = SI->getParent();
+ BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI));
+ BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
+ BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock).getFrequency());
+ // Delete the unconditional branch that was just created by the split.
+ StartBlock->getTerminator()->eraseFromParent();
+
+ // Move any debug/pseudo instructions that were in-between the select
+ // group to the newly-created end block.
+ SmallVector<Instruction *, 2> DebugPseudoINS;
+ auto DIt = SI->getIterator();
+ while (&*DIt != LastSI) {
+ if (DIt->isDebugOrPseudoInst())
+ DebugPseudoINS.push_back(&*DIt);
+ DIt++;
+ }
+ for (auto DI : DebugPseudoINS) {
+ DI->moveBefore(&*EndBlock->getFirstInsertionPt());
+ }
+
+ // These are the new basic blocks for the conditional branch.
+ // At least one will become an actual new basic block.
+ BasicBlock *TrueBlock = nullptr, *FalseBlock = nullptr;
+ BranchInst *TrueBranch = nullptr, *FalseBranch = nullptr;
+ if (!TrueSlicesInterleaved.empty()) {
+ TrueBlock = BasicBlock::Create(LastSI->getContext(), "select.true.sink",
+ EndBlock->getParent(), EndBlock);
+ TrueBranch = BranchInst::Create(EndBlock, TrueBlock);
+ TrueBranch->setDebugLoc(LastSI->getDebugLoc());
+ for (Instruction *TrueInst : TrueSlicesInterleaved)
+ TrueInst->moveBefore(TrueBranch);
+ }
+ if (!FalseSlicesInterleaved.empty()) {
+ FalseBlock = BasicBlock::Create(LastSI->getContext(), "select.false.sink",
+ EndBlock->getParent(), EndBlock);
+ FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
+ FalseBranch->setDebugLoc(LastSI->getDebugLoc());
+ for (Instruction *FalseInst : FalseSlicesInterleaved)
+ FalseInst->moveBefore(FalseBranch);
+ }
+ // If there was nothing to sink, then arbitrarily choose the 'false' side
+ // for a new input value to the PHI.
+ if (TrueBlock == FalseBlock) {
+ assert(TrueBlock == nullptr &&
+ "Unexpected basic block transform while optimizing select");
+
+ FalseBlock = BasicBlock::Create(SI->getContext(), "select.false",
+ EndBlock->getParent(), EndBlock);
+ auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
+ FalseBranch->setDebugLoc(SI->getDebugLoc());
+ }
+
+ // Insert the real conditional branch based on the original condition.
+ // If we did not create a new block for one of the 'true' or 'false' paths
+ // of the condition, it means that side of the branch goes to the end block
+ // directly and the path originates from the start block from the point of
+ // view of the new PHI.
+ BasicBlock *TT, *FT;
+ if (TrueBlock == nullptr) {
+ TT = EndBlock;
+ FT = FalseBlock;
+ TrueBlock = StartBlock;
+ } else if (FalseBlock == nullptr) {
+ TT = TrueBlock;
+ FT = EndBlock;
+ FalseBlock = StartBlock;
+ } else {
+ TT = TrueBlock;
+ FT = FalseBlock;
+ }
+ IRBuilder<> IB(SI);
+ auto *CondFr =
+ IB.CreateFreeze(SI->getCondition(), SI->getName() + ".frozen");
+ IB.CreateCondBr(CondFr, TT, FT, SI);
+
+ SmallPtrSet<const Instruction *, 2> INS;
+ INS.insert(ASI.begin(), ASI.end());
+ // Use reverse iterator because later select may use the value of the
+ // earlier select, and we need to propagate value through earlier select
+ // to get the PHI operand.
+ for (auto It = ASI.rbegin(); It != ASI.rend(); ++It) {
+ SelectInst *SI = *It;
+ // The select itself is replaced with a PHI Node.
+ PHINode *PN = PHINode::Create(SI->getType(), 2, "", &EndBlock->front());
+ PN->takeName(SI);
+ PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock);
+ PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock);
+ PN->setDebugLoc(SI->getDebugLoc());
+
+ SI->replaceAllUsesWith(PN);
+ SI->eraseFromParent();
+ INS.erase(SI);
+ ++NumSelectsConverted;
+ }
+ }
+}
+
+void SelectOptimize::collectSelectGroups(BasicBlock &BB,
+ SelectGroups &SIGroups) {
+ BasicBlock::iterator BBIt = BB.begin();
+ while (BBIt != BB.end()) {
+ Instruction *I = &*BBIt++;
+ if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
+ SelectGroup SIGroup;
+ SIGroup.push_back(SI);
+ while (BBIt != BB.end()) {
+ Instruction *NI = &*BBIt;
+ SelectInst *NSI = dyn_cast<SelectInst>(NI);
+ if (NSI && SI->getCondition() == NSI->getCondition()) {
+ SIGroup.push_back(NSI);
+ } else if (!NI->isDebugOrPseudoInst()) {
+ // Debug/pseudo instructions should be skipped and not prevent the
+ // formation of a select group.
+ break;
+ }
+ ++BBIt;
+ }
+
+ // If the select type is not supported, no point optimizing it.
+ // Instruction selection will take care of it.
+ if (!isSelectKindSupported(SI))
+ continue;
+
+ SIGroups.push_back(SIGroup);
+ }
+ }
+}
+
+void SelectOptimize::findProfitableSIGroupsBase(SelectGroups &SIGroups,
+ SelectGroups &ProfSIGroups) {
+ for (SelectGroup &ASI : SIGroups) {
+ ++NumSelectOptAnalyzed;
+ if (isConvertToBranchProfitableBase(ASI))
+ ProfSIGroups.push_back(ASI);
+ }
+}
+
+void SelectOptimize::findProfitableSIGroupsInnerLoops(
+ const Loop *L, SelectGroups &SIGroups, SelectGroups &ProfSIGroups) {
+ NumSelectOptAnalyzed += SIGroups.size();
+ // For each select group in an inner-most loop,
+ // a branch is more preferable than a select/conditional-move if:
+ // i) conversion to branches for all the select groups of the loop satisfies
+ // loop-level heuristics including reducing the loop's critical path by
+ // some threshold (see SelectOptimize::checkLoopHeuristics); and
+ // ii) the total cost of the select group is cheaper with a branch compared
+ // to its predicated version. The cost is in terms of latency and the cost
+ // of a select group is the cost of its most expensive select instruction
+ // (assuming infinite resources and thus fully leveraging available ILP).
+
+ DenseMap<const Instruction *, CostInfo> InstCostMap;
+ CostInfo LoopCost[2] = {{Scaled64::getZero(), Scaled64::getZero()},
+ {Scaled64::getZero(), Scaled64::getZero()}};
+ if (!computeLoopCosts(L, SIGroups, InstCostMap, LoopCost) ||
+ !checkLoopHeuristics(L, LoopCost)) {
+ return;
+ }
+
+ for (SelectGroup &ASI : SIGroups) {
+ // Assuming infinite resources, the cost of a group of instructions is the
+ // cost of the most expensive instruction of the group.
+ Scaled64 SelectCost = Scaled64::getZero(), BranchCost = Scaled64::getZero();
+ for (SelectInst *SI : ASI) {
+ SelectCost = std::max(SelectCost, InstCostMap[SI].PredCost);
+ BranchCost = std::max(BranchCost, InstCostMap[SI].NonPredCost);
+ }
+ if (BranchCost < SelectCost) {
+ OptimizationRemark OR(DEBUG_TYPE, "SelectOpti", ASI.front());
+ OR << "Profitable to convert to branch (loop analysis). BranchCost="
+ << BranchCost.toString() << ", SelectCost=" << SelectCost.toString()
+ << ". ";
+ ORE->emit(OR);
+ ++NumSelectConvertedLoop;
+ ProfSIGroups.push_back(ASI);
+ } else {
+ OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", ASI.front());
+ ORmiss << "Select is more profitable (loop analysis). BranchCost="
+ << BranchCost.toString()
+ << ", SelectCost=" << SelectCost.toString() << ". ";
+ ORE->emit(ORmiss);
+ }
+ }
+}
+
+bool SelectOptimize::isConvertToBranchProfitableBase(
+ const SmallVector<SelectInst *, 2> &ASI) {
+ SelectInst *SI = ASI.front();
+ OptimizationRemark OR(DEBUG_TYPE, "SelectOpti", SI);
+ OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", SI);
+
+ // Skip cold basic blocks. Better to optimize for size for cold blocks.
+ if (PSI->isColdBlock(SI->getParent(), BFI.get())) {
+ ++NumSelectColdBB;
+ ORmiss << "Not converted to branch because of cold basic block. ";
+ ORE->emit(ORmiss);
+ return false;
+ }
+
+ // If unpredictable, branch form is less profitable.
+ if (SI->getMetadata(LLVMContext::MD_unpredictable)) {
+ ++NumSelectUnPred;
+ ORmiss << "Not converted to branch because of unpredictable branch. ";
+ ORE->emit(ORmiss);
+ return false;
+ }
+
+ // If highly predictable, branch form is more profitable, unless a
+ // predictable select is inexpensive in the target architecture.
+ if (isSelectHighlyPredictable(SI) && TLI->isPredictableSelectExpensive()) {
+ ++NumSelectConvertedHighPred;
+ OR << "Converted to branch because of highly predictable branch. ";
+ ORE->emit(OR);
+ return true;
+ }
+
+ // Look for expensive instructions in the cold operand's (if any) dependence
+ // slice of any of the selects in the group.
+ if (hasExpensiveColdOperand(ASI)) {
+ ++NumSelectConvertedExpColdOperand;
+ OR << "Converted to branch because of expensive cold operand.";
+ ORE->emit(OR);
+ return true;
+ }
+
+ ORmiss << "Not profitable to convert to branch (base heuristic).";
+ ORE->emit(ORmiss);
+ return false;
+}
+
+static InstructionCost divideNearest(InstructionCost Numerator,
+ uint64_t Denominator) {
+ return (Numerator + (Denominator / 2)) / Denominator;
+}
+
+bool SelectOptimize::hasExpensiveColdOperand(
+ const SmallVector<SelectInst *, 2> &ASI) {
+ bool ColdOperand = false;
+ uint64_t TrueWeight, FalseWeight, TotalWeight;
+ if (ASI.front()->extractProfMetadata(TrueWeight, FalseWeight)) {
+ uint64_t MinWeight = std::min(TrueWeight, FalseWeight);
+ TotalWeight = TrueWeight + FalseWeight;
+ // Is there a path with frequency <ColdOperandThreshold% (default:20%) ?
+ ColdOperand = TotalWeight * ColdOperandThreshold > 100 * MinWeight;
+ } else if (PSI->hasProfileSummary()) {
+ OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", ASI.front());
+ ORmiss << "Profile data available but missing branch-weights metadata for "
+ "select instruction. ";
+ ORE->emit(ORmiss);
+ }
+ if (!ColdOperand)
+ return false;
+ // Check if the cold path's dependence slice is expensive for any of the
+ // selects of the group.
+ for (SelectInst *SI : ASI) {
+ Instruction *ColdI = nullptr;
+ uint64_t HotWeight;
+ if (TrueWeight < FalseWeight) {
+ ColdI = dyn_cast<Instruction>(SI->getTrueValue());
+ HotWeight = FalseWeight;
+ } else {
+ ColdI = dyn_cast<Instruction>(SI->getFalseValue());
+ HotWeight = TrueWeight;
+ }
+ if (ColdI) {
+ std::stack<Instruction *> ColdSlice;
+ getExclBackwardsSlice(ColdI, ColdSlice);
+ InstructionCost SliceCost = 0;
+ while (!ColdSlice.empty()) {
+ SliceCost += TTI->getInstructionCost(ColdSlice.top(),
+ TargetTransformInfo::TCK_Latency);
+ ColdSlice.pop();
+ }
+ // The colder the cold value operand of the select is the more expensive
+ // the cmov becomes for computing the cold value operand every time. Thus,
+ // the colder the cold operand is the more its cost counts.
+ // Get nearest integer cost adjusted for coldness.
+ InstructionCost AdjSliceCost =
+ divideNearest(SliceCost * HotWeight, TotalWeight);
+ if (AdjSliceCost >=
+ ColdOperandMaxCostMultiplier * TargetTransformInfo::TCC_Expensive)
+ return true;
+ }
+ }
+ return false;
+}
+
+// For a given source instruction, collect its backwards dependence slice
+// consisting of instructions exclusively computed for the purpose of producing
+// the operands of the source instruction. As an approximation
+// (sufficiently-accurate in practice), we populate this set with the
+// instructions of the backwards dependence slice that only have one-use and
+// form an one-use chain that leads to the source instruction.
+void SelectOptimize::getExclBackwardsSlice(Instruction *I,
+ std::stack<Instruction *> &Slice,
+ bool ForSinking) {
+ SmallPtrSet<Instruction *, 2> Visited;
+ std::queue<Instruction *> Worklist;
+ Worklist.push(I);
+ while (!Worklist.empty()) {
+ Instruction *II = Worklist.front();
+ Worklist.pop();
+
+ // Avoid cycles.
+ if (!Visited.insert(II).second)
+ continue;
+
+ if (!II->hasOneUse())
+ continue;
+
+ // Cannot soundly sink instructions with side-effects.
+ // Terminator or phi instructions cannot be sunk.
+ // Avoid sinking other select instructions (should be handled separetely).
+ if (ForSinking && (II->isTerminator() || II->mayHaveSideEffects() ||
+ isa<SelectInst>(II) || isa<PHINode>(II)))
+ continue;
+
+ // Avoid considering instructions with less frequency than the source
+ // instruction (i.e., avoid colder code regions of the dependence slice).
+ if (BFI->getBlockFreq(II->getParent()) < BFI->getBlockFreq(I->getParent()))
+ continue;
+
+ // Eligible one-use instruction added to the dependence slice.
+ Slice.push(II);
+
+ // Explore all the operands of the current instruction to expand the slice.
+ for (unsigned k = 0; k < II->getNumOperands(); ++k)
+ if (auto *OpI = dyn_cast<Instruction>(II->getOperand(k)))
+ Worklist.push(OpI);
+ }
+}
+
+bool SelectOptimize::isSelectHighlyPredictable(const SelectInst *SI) {
+ uint64_t TrueWeight, FalseWeight;
+ if (SI->extractProfMetadata(TrueWeight, FalseWeight)) {
+ uint64_t Max = std::max(TrueWeight, FalseWeight);
+ uint64_t Sum = TrueWeight + FalseWeight;
+ if (Sum != 0) {
+ auto Probability = BranchProbability::getBranchProbability(Max, Sum);
+ if (Probability > TTI->getPredictableBranchThreshold())
+ return true;
+ }
+ }
+ return false;
+}
+
+bool SelectOptimize::checkLoopHeuristics(const Loop *L,
+ const CostInfo LoopCost[2]) {
+ // Loop-level checks to determine if a non-predicated version (with branches)
+ // of the loop is more profitable than its predicated version.
+
+ if (DisableLoopLevelHeuristics)
+ return true;
+
+ OptimizationRemarkMissed ORmissL(DEBUG_TYPE, "SelectOpti",
+ L->getHeader()->getFirstNonPHI());
+
+ if (LoopCost[0].NonPredCost > LoopCost[0].PredCost ||
+ LoopCost[1].NonPredCost >= LoopCost[1].PredCost) {
+ ORmissL << "No select conversion in the loop due to no reduction of loop's "
+ "critical path. ";
+ ORE->emit(ORmissL);
+ return false;
+ }
+
+ Scaled64 Gain[2] = {LoopCost[0].PredCost - LoopCost[0].NonPredCost,
+ LoopCost[1].PredCost - LoopCost[1].NonPredCost};
+
+ // Profitably converting to branches need to reduce the loop's critical path
+ // by at least some threshold (absolute gain of GainCycleThreshold cycles and
+ // relative gain of 12.5%).
+ if (Gain[1] < Scaled64::get(GainCycleThreshold) ||
+ Gain[1] * Scaled64::get(GainRelativeThreshold) < LoopCost[1].PredCost) {
+ Scaled64 RelativeGain = Scaled64::get(100) * Gain[1] / LoopCost[1].PredCost;
+ ORmissL << "No select conversion in the loop due to small reduction of "
+ "loop's critical path. Gain="
+ << Gain[1].toString()
+ << ", RelativeGain=" << RelativeGain.toString() << "%. ";
+ ORE->emit(ORmissL);
+ return false;
+ }
+
+ // If the loop's critical path involves loop-carried dependences, the gradient
+ // of the gain needs to be at least GainGradientThreshold% (defaults to 25%).
+ // This check ensures that the latency reduction for the loop's critical path
+ // keeps decreasing with sufficient rate beyond the two analyzed loop
+ // iterations.
+ if (Gain[1] > Gain[0]) {
+ Scaled64 GradientGain = Scaled64::get(100) * (Gain[1] - Gain[0]) /
+ (LoopCost[1].PredCost - LoopCost[0].PredCost);
+ if (GradientGain < Scaled64::get(GainGradientThreshold)) {
+ ORmissL << "No select conversion in the loop due to small gradient gain. "
+ "GradientGain="
+ << GradientGain.toString() << "%. ";
+ ORE->emit(ORmissL);
+ return false;
+ }
+ }
+ // If the gain decreases it is not profitable to convert.
+ else if (Gain[1] < Gain[0]) {
+ ORmissL
+ << "No select conversion in the loop due to negative gradient gain. ";
+ ORE->emit(ORmissL);
+ return false;
+ }
+
+ // Non-predicated version of the loop is more profitable than its
+ // predicated version.
+ return true;
+}
+
+// Computes instruction and loop-critical-path costs for both the predicated
+// and non-predicated version of the given loop.
+// Returns false if unable to compute these costs due to invalid cost of loop
+// instruction(s).
+bool SelectOptimize::computeLoopCosts(
+ const Loop *L, const SelectGroups &SIGroups,
+ DenseMap<const Instruction *, CostInfo> &InstCostMap, CostInfo *LoopCost) {
+ const auto &SIset = getSIset(SIGroups);
+ // Compute instruction and loop-critical-path costs across two iterations for
+ // both predicated and non-predicated version.
+ const unsigned Iterations = 2;
+ for (unsigned Iter = 0; Iter < Iterations; ++Iter) {
+ // Cost of the loop's critical path.
+ CostInfo &MaxCost = LoopCost[Iter];
+ for (BasicBlock *BB : L->getBlocks()) {
+ for (const Instruction &I : *BB) {
+ if (I.isDebugOrPseudoInst())
+ continue;
+ // Compute the predicated and non-predicated cost of the instruction.
+ Scaled64 IPredCost = Scaled64::getZero(),
+ INonPredCost = Scaled64::getZero();
+
+ // Assume infinite resources that allow to fully exploit the available
+ // instruction-level parallelism.
+ // InstCost = InstLatency + max(Op1Cost, Op2Cost, … OpNCost)
+ for (const Use &U : I.operands()) {
+ auto UI = dyn_cast<Instruction>(U.get());
+ if (!UI)
+ continue;
+ if (InstCostMap.count(UI)) {
+ IPredCost = std::max(IPredCost, InstCostMap[UI].PredCost);
+ INonPredCost = std::max(INonPredCost, InstCostMap[UI].NonPredCost);
+ }
+ }
+ auto ILatency = computeInstCost(&I);
+ if (!ILatency) {
+ OptimizationRemarkMissed ORmissL(DEBUG_TYPE, "SelectOpti", &I);
+ ORmissL << "Invalid instruction cost preventing analysis and "
+ "optimization of the inner-most loop containing this "
+ "instruction. ";
+ ORE->emit(ORmissL);
+ return false;
+ }
+ IPredCost += Scaled64::get(ILatency.getValue());
+ INonPredCost += Scaled64::get(ILatency.getValue());
+
+ // For a select that can be converted to branch,
+ // compute its cost as a branch (non-predicated cost).
+ //
+ // BranchCost = PredictedPathCost + MispredictCost
+ // PredictedPathCost = TrueOpCost * TrueProb + FalseOpCost * FalseProb
+ // MispredictCost = max(MispredictPenalty, CondCost) * MispredictRate
+ if (SIset.contains(&I)) {
+ auto SI = dyn_cast<SelectInst>(&I);
+
+ Scaled64 TrueOpCost = Scaled64::getZero(),
+ FalseOpCost = Scaled64::getZero();
+ if (auto *TI = dyn_cast<Instruction>(SI->getTrueValue()))
+ if (InstCostMap.count(TI))
+ TrueOpCost = InstCostMap[TI].NonPredCost;
+ if (auto *FI = dyn_cast<Instruction>(SI->getFalseValue()))
+ if (InstCostMap.count(FI))
+ FalseOpCost = InstCostMap[FI].NonPredCost;
+ Scaled64 PredictedPathCost =
+ getPredictedPathCost(TrueOpCost, FalseOpCost, SI);
+
+ Scaled64 CondCost = Scaled64::getZero();
+ if (auto *CI = dyn_cast<Instruction>(SI->getCondition()))
+ if (InstCostMap.count(CI))
+ CondCost = InstCostMap[CI].NonPredCost;
+ Scaled64 MispredictCost = getMispredictionCost(SI, CondCost);
+
+ INonPredCost = PredictedPathCost + MispredictCost;
+ }
+
+ InstCostMap[&I] = {IPredCost, INonPredCost};
+ MaxCost.PredCost = std::max(MaxCost.PredCost, IPredCost);
+ MaxCost.NonPredCost = std::max(MaxCost.NonPredCost, INonPredCost);
+ }
+ }
+ }
+ return true;
+}
+
+SmallPtrSet<const Instruction *, 2>
+SelectOptimize::getSIset(const SelectGroups &SIGroups) {
+ SmallPtrSet<const Instruction *, 2> SIset;
+ for (const SelectGroup &ASI : SIGroups)
+ for (const SelectInst *SI : ASI)
+ SIset.insert(SI);
+ return SIset;
+}
+
+Optional<uint64_t> SelectOptimize::computeInstCost(const Instruction *I) {
+ InstructionCost ICost =
+ TTI->getInstructionCost(I, TargetTransformInfo::TCK_Latency);
+ if (auto OC = ICost.getValue())
+ return Optional<uint64_t>(*OC);
+ return Optional<uint64_t>(None);
+}
+
+ScaledNumber<uint64_t>
+SelectOptimize::getMispredictionCost(const SelectInst *SI,
+ const Scaled64 CondCost) {
+ uint64_t MispredictPenalty = TSchedModel.getMCSchedModel()->MispredictPenalty;
+
+ // Account for the default misprediction rate when using a branch
+ // (conservatively set to 25% by default).
+ uint64_t MispredictRate = MispredictDefaultRate;
+ // If the select condition is obviously predictable, then the misprediction
+ // rate is zero.
+ if (isSelectHighlyPredictable(SI))
+ MispredictRate = 0;
+
+ // CondCost is included to account for cases where the computation of the
+ // condition is part of a long dependence chain (potentially loop-carried)
+ // that would delay detection of a misprediction and increase its cost.
+ Scaled64 MispredictCost =
+ std::max(Scaled64::get(MispredictPenalty), CondCost) *
+ Scaled64::get(MispredictRate);
+ MispredictCost /= Scaled64::get(100);
+
+ return MispredictCost;
+}
+
+// Returns the cost of a branch when the prediction is correct.
+// TrueCost * TrueProbability + FalseCost * FalseProbability.
+ScaledNumber<uint64_t>
+SelectOptimize::getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost,
+ const SelectInst *SI) {
+ Scaled64 PredPathCost;
+ uint64_t TrueWeight, FalseWeight;
+ if (SI->extractProfMetadata(TrueWeight, FalseWeight)) {
+ uint64_t SumWeight = TrueWeight + FalseWeight;
+ if (SumWeight != 0) {
+ PredPathCost = TrueCost * Scaled64::get(TrueWeight) +
+ FalseCost * Scaled64::get(FalseWeight);
+ PredPathCost /= Scaled64::get(SumWeight);
+ return PredPathCost;
+ }
+ }
+ // Without branch weight metadata, we assume 75% for the one path and 25% for
+ // the other, and pick the result with the biggest cost.
+ PredPathCost = std::max(TrueCost * Scaled64::get(3) + FalseCost,
+ FalseCost * Scaled64::get(3) + TrueCost);
+ PredPathCost /= Scaled64::get(4);
+ return PredPathCost;
+}
+
+bool SelectOptimize::isSelectKindSupported(SelectInst *SI) {
+ bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
+ if (VectorCond)
+ return false;
+ TargetLowering::SelectSupportKind SelectKind;
+ if (SI->getType()->isVectorTy())
+ SelectKind = TargetLowering::ScalarCondVectorVal;
+ else
+ SelectKind = TargetLowering::ScalarValSelect;
+ return TLI->isSelectSupported(SelectKind);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index ec297579090e..aa688d9dda3c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -35,7 +35,6 @@
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/DAGCombine.h"
#include "llvm/CodeGen/ISDOpcodes.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
@@ -52,7 +51,6 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CodeGen.h"
@@ -426,6 +424,7 @@ namespace {
SDValue visitREM(SDNode *N);
SDValue visitMULHU(SDNode *N);
SDValue visitMULHS(SDNode *N);
+ SDValue visitAVG(SDNode *N);
SDValue visitSMUL_LOHI(SDNode *N);
SDValue visitUMUL_LOHI(SDNode *N);
SDValue visitMULO(SDNode *N);
@@ -511,6 +510,7 @@ namespace {
SDValue visitMSCATTER(SDNode *N);
SDValue visitFP_TO_FP16(SDNode *N);
SDValue visitFP16_TO_FP(SDNode *N);
+ SDValue visitFP_TO_BF16(SDNode *N);
SDValue visitVECREDUCE(SDNode *N);
SDValue visitVPOp(SDNode *N);
@@ -520,7 +520,9 @@ namespace {
SDValue XformToShuffleWithZero(SDNode *N);
bool reassociationCanBreakAddressingModePattern(unsigned Opc,
- const SDLoc &DL, SDValue N0,
+ const SDLoc &DL,
+ SDNode *N,
+ SDValue N0,
SDValue N1);
SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
SDValue N1);
@@ -570,6 +572,8 @@ namespace {
SDValue BuildSDIV(SDNode *N);
SDValue BuildSDIVPow2(SDNode *N);
SDValue BuildUDIV(SDNode *N);
+ SDValue BuildSREMPow2(SDNode *N);
+ SDValue buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N);
SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
@@ -583,11 +587,11 @@ namespace {
bool DemandHighBits = true);
SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
- SDValue InnerPos, SDValue InnerNeg,
+ SDValue InnerPos, SDValue InnerNeg, bool HasPos,
unsigned PosOpcode, unsigned NegOpcode,
const SDLoc &DL);
SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
- SDValue InnerPos, SDValue InnerNeg,
+ SDValue InnerPos, SDValue InnerNeg, bool HasPos,
unsigned PosOpcode, unsigned NegOpcode,
const SDLoc &DL);
SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
@@ -665,9 +669,8 @@ namespace {
/// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
/// MulNode is the original multiply, AddNode is (add x, c1),
/// and ConstNode is c2.
- bool isMulAddWithConstProfitable(SDNode *MulNode,
- SDValue &AddNode,
- SDValue &ConstNode);
+ bool isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
+ SDValue ConstNode);
/// This is a helper function for visitAND and visitZERO_EXTEND. Returns
/// true if the (and (load x) c) pattern matches an extload. ExtVT returns
@@ -880,8 +883,8 @@ void DAGCombiner::deleteAndRecombine(SDNode *N) {
// We provide an Offset so that we can create bitwidths that won't overflow.
static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
- LHS = LHS.zextOrSelf(Bits);
- RHS = RHS.zextOrSelf(Bits);
+ LHS = LHS.zext(Bits);
+ RHS = RHS.zext(Bits);
}
// Return true if this node is a setcc, or is a select_cc
@@ -926,7 +929,7 @@ bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
/// it is profitable to do so.
bool DAGCombiner::isOneUseSetCC(SDValue N) const {
SDValue N0, N1, N2;
- if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
+ if (isSetCCEquivalent(N, N0, N1, N2) && N->hasOneUse())
return true;
return false;
}
@@ -996,6 +999,7 @@ static bool canSplitIdx(LoadSDNode *LD) {
bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
const SDLoc &DL,
+ SDNode *N,
SDValue N0,
SDValue N1) {
// Currently this only tries to ensure we don't undo the GEP splits done by
@@ -1004,33 +1008,62 @@ bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
// (load/store (add, (add, x, offset1), offset2)) ->
// (load/store (add, x, offset1+offset2)).
- if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
- return false;
+ // (load/store (add, (add, x, y), offset2)) ->
+ // (load/store (add, (add, x, offset2), y)).
- if (N0.hasOneUse())
+ if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
return false;
- auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
auto *C2 = dyn_cast<ConstantSDNode>(N1);
- if (!C1 || !C2)
+ if (!C2)
return false;
- const APInt &C1APIntVal = C1->getAPIntValue();
const APInt &C2APIntVal = C2->getAPIntValue();
- if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64)
+ if (C2APIntVal.getSignificantBits() > 64)
return false;
- const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
- if (CombinedValueIntVal.getBitWidth() > 64)
- return false;
- const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
-
- for (SDNode *Node : N0->uses()) {
- auto LoadStore = dyn_cast<MemSDNode>(Node);
- if (LoadStore) {
- // Is x[offset2] already not a legal addressing mode? If so then
- // reassociating the constants breaks nothing (we test offset2 because
- // that's the one we hope to fold into the load or store).
+ if (auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ if (N0.hasOneUse())
+ return false;
+
+ const APInt &C1APIntVal = C1->getAPIntValue();
+ const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
+ if (CombinedValueIntVal.getSignificantBits() > 64)
+ return false;
+ const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
+
+ for (SDNode *Node : N->uses()) {
+ if (auto *LoadStore = dyn_cast<MemSDNode>(Node)) {
+ // Is x[offset2] already not a legal addressing mode? If so then
+ // reassociating the constants breaks nothing (we test offset2 because
+ // that's the one we hope to fold into the load or store).
+ TargetLoweringBase::AddrMode AM;
+ AM.HasBaseReg = true;
+ AM.BaseOffs = C2APIntVal.getSExtValue();
+ EVT VT = LoadStore->getMemoryVT();
+ unsigned AS = LoadStore->getAddressSpace();
+ Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
+ if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
+ continue;
+
+ // Would x[offset1+offset2] still be a legal addressing mode?
+ AM.BaseOffs = CombinedValue;
+ if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
+ return true;
+ }
+ }
+ } else {
+ if (auto *GA = dyn_cast<GlobalAddressSDNode>(N0.getOperand(1)))
+ if (GA->getOpcode() == ISD::GlobalAddress && TLI.isOffsetFoldingLegal(GA))
+ return false;
+
+ for (SDNode *Node : N->uses()) {
+ auto *LoadStore = dyn_cast<MemSDNode>(Node);
+ if (!LoadStore)
+ return false;
+
+ // Is x[offset2] a legal addressing mode? If so then
+ // reassociating the constants breaks address pattern
TargetLoweringBase::AddrMode AM;
AM.HasBaseReg = true;
AM.BaseOffs = C2APIntVal.getSExtValue();
@@ -1038,13 +1071,9 @@ bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
unsigned AS = LoadStore->getAddressSpace();
Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
- continue;
-
- // Would x[offset1+offset2] still be a legal addressing mode?
- AM.BaseOffs = CombinedValue;
- if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
- return true;
+ return false;
}
+ return true;
}
return false;
@@ -1072,11 +1101,51 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
if (TLI.isReassocProfitable(DAG, N0, N1)) {
// Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
// iff (op x, c1) has one use
- if (SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1))
- return DAG.getNode(Opc, DL, VT, OpNode, N01);
- return SDValue();
+ SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1);
+ return DAG.getNode(Opc, DL, VT, OpNode, N01);
+ }
+ }
+
+ // Check for repeated operand logic simplifications.
+ if (Opc == ISD::AND || Opc == ISD::OR) {
+ // (N00 & N01) & N00 --> N00 & N01
+ // (N00 & N01) & N01 --> N00 & N01
+ // (N00 | N01) | N00 --> N00 | N01
+ // (N00 | N01) | N01 --> N00 | N01
+ if (N1 == N00 || N1 == N01)
+ return N0;
+ }
+ if (Opc == ISD::XOR) {
+ // (N00 ^ N01) ^ N00 --> N01
+ if (N1 == N00)
+ return N01;
+ // (N00 ^ N01) ^ N01 --> N00
+ if (N1 == N01)
+ return N00;
+ }
+
+ if (TLI.isReassocProfitable(DAG, N0, N1)) {
+ if (N1 != N01) {
+ // Reassociate if (op N00, N1) already exist
+ if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N00, N1})) {
+ // if Op (Op N00, N1), N01 already exist
+ // we need to stop reassciate to avoid dead loop
+ if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N01}))
+ return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N01);
+ }
+ }
+
+ if (N1 != N00) {
+ // Reassociate if (op N01, N1) already exist
+ if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N01, N1})) {
+ // if Op (Op N01, N1), N00 already exist
+ // we need to stop reassciate to avoid dead loop
+ if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N00}))
+ return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N00);
+ }
}
}
+
return SDValue();
}
@@ -1103,7 +1172,7 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
++NodesCombined;
LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
- To[0].getNode()->dump(&DAG);
+ To[0].dump(&DAG);
dbgs() << " and " << NumTo - 1 << " other values\n");
for (unsigned i = 0, e = NumTo; i != e; ++i)
assert((!To[i].getNode() ||
@@ -1115,10 +1184,8 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
if (AddTo) {
// Push the new nodes and any users onto the worklist
for (unsigned i = 0, e = NumTo; i != e; ++i) {
- if (To[i].getNode()) {
- AddToWorklist(To[i].getNode());
- AddUsersToWorklist(To[i].getNode());
- }
+ if (To[i].getNode())
+ AddToWorklistWithUsers(To[i].getNode());
}
}
@@ -1134,9 +1201,8 @@ void DAGCombiner::
CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
// Replace the old value with the new one.
++NodesCombined;
- LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
- dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
- dbgs() << '\n');
+ LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.dump(&DAG);
+ dbgs() << "\nWith: "; TLO.New.dump(&DAG); dbgs() << '\n');
// Replace all uses. If any nodes become isomorphic to other nodes and
// are deleted, make sure to remove them from our worklist.
@@ -1149,7 +1215,7 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
// Finally, if the node is now dead, remove it from the graph. The node
// may not be dead if the replacement process recursively simplified to
// something else needing this node.
- if (TLO.Old.getNode()->use_empty())
+ if (TLO.Old->use_empty())
deleteAndRecombine(TLO.Old.getNode());
}
@@ -1196,7 +1262,7 @@ void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
- Trunc.getNode()->dump(&DAG); dbgs() << '\n');
+ Trunc.dump(&DAG); dbgs() << '\n');
WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
@@ -1295,7 +1361,7 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
assert(PVT != VT && "Don't know what type to promote to!");
- LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
+ LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
bool Replace0 = false;
SDValue N0 = Op.getOperand(0);
@@ -1322,7 +1388,7 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
// If operands have a use ordering, make sure we deal with
// predecessor first.
- if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
+ if (Replace0 && Replace1 && N0->isPredecessorOf(N1.getNode())) {
std::swap(N0, N1);
std::swap(NN0, NN1);
}
@@ -1363,11 +1429,10 @@ SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
assert(PVT != VT && "Don't know what type to promote to!");
- LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
+ LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
bool Replace = false;
SDValue N0 = Op.getOperand(0);
- SDValue N1 = Op.getOperand(1);
if (Opc == ISD::SRA)
N0 = SExtPromoteOperand(N0, PVT);
else if (Opc == ISD::SRL)
@@ -1379,6 +1444,7 @@ SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
return SDValue();
SDLoc DL(Op);
+ SDValue N1 = Op.getOperand(1);
SDValue RV =
DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
@@ -1414,7 +1480,7 @@ SDValue DAGCombiner::PromoteExtend(SDValue Op) {
// fold (aext (aext x)) -> (aext x)
// fold (aext (zext x)) -> (zext x)
// fold (aext (sext x)) -> (sext x)
- LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
+ LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
}
return SDValue();
@@ -1455,7 +1521,7 @@ bool DAGCombiner::PromoteLoad(SDValue Op) {
SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
- Result.getNode()->dump(&DAG); dbgs() << '\n');
+ Result.dump(&DAG); dbgs() << '\n');
WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
@@ -1569,9 +1635,9 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
RV.getOpcode() != ISD::DELETED_NODE &&
"Node was deleted but visit returned new node!");
- LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG));
+ LLVM_DEBUG(dbgs() << " ... into: "; RV.dump(&DAG));
- if (N->getNumValues() == RV.getNode()->getNumValues())
+ if (N->getNumValues() == RV->getNumValues())
DAG.ReplaceAllUsesWith(N, RV.getNode());
else {
assert(N->getValueType(0) == RV.getValueType() &&
@@ -1635,6 +1701,10 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::UREM: return visitREM(N);
case ISD::MULHU: return visitMULHU(N);
case ISD::MULHS: return visitMULHS(N);
+ case ISD::AVGFLOORS:
+ case ISD::AVGFLOORU:
+ case ISD::AVGCEILS:
+ case ISD::AVGCEILU: return visitAVG(N);
case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
case ISD::SMULO:
@@ -1724,6 +1794,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::LIFETIME_END: return visitLIFETIME_END(N);
case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
+ case ISD::FP_TO_BF16: return visitFP_TO_BF16(N);
case ISD::FREEZE: return visitFREEZE(N);
case ISD::VECREDUCE_FADD:
case ISD::VECREDUCE_FMUL:
@@ -2072,8 +2143,9 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, SelectionDAG &DAG,
return false;
VT = ST->getMemoryVT();
AS = ST->getAddressSpace();
- } else
+ } else {
return false;
+ }
TargetLowering::AddrMode AM;
if (N->getOpcode() == ISD::ADD) {
@@ -2094,8 +2166,9 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, SelectionDAG &DAG,
else
// [reg +/- reg]
AM.Scale = 1;
- } else
+ } else {
return false;
+ }
return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
VT.getTypeForEVT(*DAG.getContext()), AS);
@@ -2139,6 +2212,18 @@ static SDValue foldSelectWithIdentityConstant(SDNode *N, SelectionDAG &DAG,
return C->isExactlyValue(1.0);
}
}
+ if (ConstantSDNode *C = isConstOrConstSplat(V)) {
+ switch (Opcode) {
+ case ISD::ADD: // X + 0 --> X
+ case ISD::SUB: // X - 0 --> X
+ case ISD::SHL: // X << 0 --> X
+ case ISD::SRA: // X s>> 0 --> X
+ case ISD::SRL: // X u>> 0 --> X
+ return C->isZero();
+ case ISD::MUL: // X * 1 --> X
+ return C->isOne();
+ }
+ }
return false;
};
@@ -2316,6 +2401,15 @@ static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
+static bool isADDLike(SDValue V, const SelectionDAG &DAG) {
+ unsigned Opcode = V.getOpcode();
+ if (Opcode == ISD::OR)
+ return DAG.haveNoCommonBitsSet(V.getOperand(0), V.getOperand(1));
+ if (Opcode == ISD::XOR)
+ return isMinSignedConstant(V.getOperand(1));
+ return false;
+}
+
/// Try to fold a node that behaves like an ADD (note that N isn't necessarily
/// an ISD::ADD here, it could for example be an ISD::OR if we know that there
/// are no common bits set in the operands).
@@ -2354,66 +2448,60 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
if (isNullConstant(N1))
return N0;
- if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
+ if (N0.getOpcode() == ISD::SUB) {
+ SDValue N00 = N0.getOperand(0);
+ SDValue N01 = N0.getOperand(1);
+
// fold ((A-c1)+c2) -> (A+(c2-c1))
- if (N0.getOpcode() == ISD::SUB &&
- isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
- SDValue Sub =
- DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N0.getOperand(1)});
- assert(Sub && "Constant folding failed");
+ if (SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N01}))
return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
- }
// fold ((c1-A)+c2) -> (c1+c2)-A
- if (N0.getOpcode() == ISD::SUB &&
- isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
- SDValue Add =
- DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N0.getOperand(0)});
- assert(Add && "Constant folding failed");
+ if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N00}))
return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
- }
+ }
- // add (sext i1 X), 1 -> zext (not i1 X)
- // We don't transform this pattern:
- // add (zext i1 X), -1 -> sext (not i1 X)
- // because most (?) targets generate better code for the zext form.
- if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
- isOneOrOneSplat(N1)) {
- SDValue X = N0.getOperand(0);
- if ((!LegalOperations ||
- (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
- TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
- X.getScalarValueSizeInBits() == 1) {
- SDValue Not = DAG.getNOT(DL, X, X.getValueType());
- return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
- }
+ // add (sext i1 X), 1 -> zext (not i1 X)
+ // We don't transform this pattern:
+ // add (zext i1 X), -1 -> sext (not i1 X)
+ // because most (?) targets generate better code for the zext form.
+ if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
+ isOneOrOneSplat(N1)) {
+ SDValue X = N0.getOperand(0);
+ if ((!LegalOperations ||
+ (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
+ TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
+ X.getScalarValueSizeInBits() == 1) {
+ SDValue Not = DAG.getNOT(DL, X, X.getValueType());
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
}
+ }
- // Fold (add (or x, c0), c1) -> (add x, (c0 + c1)) if (or x, c0) is
- // equivalent to (add x, c0).
- if (N0.getOpcode() == ISD::OR &&
- isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true) &&
- DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
- if (SDValue Add0 = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT,
- {N1, N0.getOperand(1)}))
- return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
- }
+ // Fold (add (or x, c0), c1) -> (add x, (c0 + c1))
+ // iff (or x, c0) is equivalent to (add x, c0).
+ // Fold (add (xor x, c0), c1) -> (add x, (c0 + c1))
+ // iff (xor x, c0) is equivalent to (add x, c0).
+ if (isADDLike(N0, DAG)) {
+ SDValue N01 = N0.getOperand(1);
+ if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N01}))
+ return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add);
}
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
// reassociate add
- if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N0, N1)) {
+ if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N, N0, N1)) {
if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
return RADD;
// Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is
// equivalent to (add x, c).
+ // Reassociate (add (xor x, c), y) -> (add add(x, y), c)) if (xor x, c) is
+ // equivalent to (add x, c).
auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
- if (N0.getOpcode() == ISD::OR && N0.hasOneUse() &&
- isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true) &&
- DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
+ if (isADDLike(N0, DAG) && N0.hasOneUse() &&
+ isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
return DAG.getNode(ISD::ADD, DL, VT,
DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
N0.getOperand(1));
@@ -2473,7 +2561,8 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
N1.getOperand(1));
// fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
- if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
+ if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
+ N0->hasOneUse() && N1->hasOneUse()) {
SDValue N00 = N0.getOperand(0);
SDValue N01 = N0.getOperand(1);
SDValue N10 = N1.getOperand(0);
@@ -2526,8 +2615,8 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
// add (add x, y), 1
// And if the target does not like this form then turn into:
// sub y, (xor x, -1)
- if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
- N0.getOpcode() == ISD::ADD) {
+ if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
+ N0.hasOneUse()) {
SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
DAG.getAllOnesConstant(DL, VT));
return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
@@ -2535,7 +2624,7 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
}
// (x - y) + -1 -> add (xor y, -1), x
- if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
+ if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
isAllOnesOrAllOnesSplat(N1)) {
SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
@@ -2632,7 +2721,8 @@ SDValue DAGCombiner::visitADDSAT(SDNode *N) {
// fold vector ops
if (VT.isVector()) {
- // TODO SimplifyVBinOp
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
+ return FoldedVOp;
// fold (add_sat x, 0) -> x, vector edition
if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
@@ -2678,7 +2768,7 @@ static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
return SDValue();
- EVT VT = V.getNode()->getValueType(0);
+ EVT VT = V->getValueType(0);
if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
return SDValue();
@@ -2731,27 +2821,27 @@ SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
// add (add x, 1), y
// And if the target does not like this form then turn into:
// sub y, (xor x, -1)
- if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
- N0.getOpcode() == ISD::ADD && isOneOrOneSplat(N0.getOperand(1))) {
+ if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
+ N0.hasOneUse() && isOneOrOneSplat(N0.getOperand(1))) {
SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
DAG.getAllOnesConstant(DL, VT));
return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
}
- // Hoist one-use subtraction by non-opaque constant:
- // (x - C) + y -> (x + y) - C
- // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
- if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
- isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
- SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
- return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
- }
- // Hoist one-use subtraction from non-opaque constant:
- // (C - x) + y -> (y - x) + C
- if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
- isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
- SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
- return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
+ if (N0.getOpcode() == ISD::SUB && N0.hasOneUse()) {
+ // Hoist one-use subtraction by non-opaque constant:
+ // (x - C) + y -> (x + y) - C
+ // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
+ if (isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
+ SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
+ return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
+ }
+ // Hoist one-use subtraction from non-opaque constant:
+ // (C - x) + y -> (y - x) + C
+ if (isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
+ SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
+ return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
+ }
}
// If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
@@ -3127,21 +3217,26 @@ static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
// Our goal is to identify A, B, and CarryIn and produce ADDCARRY/SUBCARRY with
// a single path for carry/borrow out propagation:
static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI,
- SDValue Carry0, SDValue Carry1, SDNode *N) {
- if (Carry0.getResNo() != 1 || Carry1.getResNo() != 1)
+ SDValue N0, SDValue N1, SDNode *N) {
+ SDValue Carry0 = getAsCarry(TLI, N0);
+ if (!Carry0)
return SDValue();
+ SDValue Carry1 = getAsCarry(TLI, N1);
+ if (!Carry1)
+ return SDValue();
+
unsigned Opcode = Carry0.getOpcode();
if (Opcode != Carry1.getOpcode())
return SDValue();
if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
return SDValue();
- // Canonicalize the add/sub of A and B as Carry0 and the add/sub of the
- // carry/borrow in as Carry1. (The top and middle uaddo nodes respectively in
- // the above ASCII art.)
- if (Carry1.getOperand(0) != Carry0.getValue(0) &&
- Carry1.getOperand(1) != Carry0.getValue(0))
+ // Canonicalize the add/sub of A and B (the top node in the above ASCII art)
+ // as Carry0 and the add/sub of the carry in as Carry1 (the middle node).
+ if (Carry1.getNode()->isOperandOf(Carry0.getNode()))
std::swap(Carry0, Carry1);
+
+ // Check if nodes are connected in expected way.
if (Carry1.getOperand(0) != Carry0.getValue(0) &&
Carry1.getOperand(1) != Carry0.getValue(0))
return SDValue();
@@ -3321,9 +3416,15 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
EVT VT = N0.getValueType();
SDLoc DL(N);
+ auto PeekThroughFreeze = [](SDValue N) {
+ if (N->getOpcode() == ISD::FREEZE && N.hasOneUse())
+ return N->getOperand(0);
+ return N;
+ };
+
// fold (sub x, x) -> 0
// FIXME: Refactor this and xor and other similar operations together.
- if (N0 == N1)
+ if (PeekThroughFreeze(N0) == PeekThroughFreeze(N1))
return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
// fold (sub c1, c2) -> c3
@@ -3381,7 +3482,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
}
// Convert 0 - abs(x).
- if (N1->getOpcode() == ISD::ABS &&
+ if (N1.getOpcode() == ISD::ABS && N1.hasOneUse() &&
!TLI.isOperationLegalOrCustom(ISD::ABS, VT))
if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true))
return Result;
@@ -3419,44 +3520,31 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
return N0.getOperand(0);
// fold (A+C1)-C2 -> A+(C1-C2)
- if (N0.getOpcode() == ISD::ADD &&
- isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
- isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
- SDValue NewC =
- DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(1), N1});
- assert(NewC && "Constant folding failed");
- return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
+ if (N0.getOpcode() == ISD::ADD) {
+ SDValue N01 = N0.getOperand(1);
+ if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N01, N1}))
+ return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
}
// fold C2-(A+C1) -> (C2-C1)-A
if (N1.getOpcode() == ISD::ADD) {
SDValue N11 = N1.getOperand(1);
- if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
- isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
- SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11});
- assert(NewC && "Constant folding failed");
+ if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11}))
return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
- }
}
// fold (A-C1)-C2 -> A-(C1+C2)
- if (N0.getOpcode() == ISD::SUB &&
- isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
- isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
- SDValue NewC =
- DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0.getOperand(1), N1});
- assert(NewC && "Constant folding failed");
- return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
+ if (N0.getOpcode() == ISD::SUB) {
+ SDValue N01 = N0.getOperand(1);
+ if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N01, N1}))
+ return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
}
// fold (c1-A)-c2 -> (c1-c2)-A
- if (N0.getOpcode() == ISD::SUB &&
- isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
- isConstantOrConstantVector(N0.getOperand(0), /* NoOpaques */ true)) {
- SDValue NewC =
- DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(0), N1});
- assert(NewC && "Constant folding failed");
- return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
+ if (N0.getOpcode() == ISD::SUB) {
+ SDValue N00 = N0.getOperand(0);
+ if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N00, N1}))
+ return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
}
// fold ((A+(B+or-C))-B) -> A+or-C
@@ -3651,6 +3739,15 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
}
}
+ // As with the previous fold, prefer add for more folding potential.
+ // Subtracting SMIN/0 is the same as adding SMIN/0:
+ // N0 - (X << BW-1) --> N0 + (X << BW-1)
+ if (N1.getOpcode() == ISD::SHL) {
+ ConstantSDNode *ShlC = isConstOrConstSplat(N1.getOperand(1));
+ if (ShlC && ShlC->getAPIntValue() == VT.getScalarSizeInBits() - 1)
+ return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
+ }
+
if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) {
// (sub Carry, X) -> (addcarry (sub 0, X), 0, Carry)
if (SDValue Carry = getAsCarry(TLI, N0)) {
@@ -3686,7 +3783,8 @@ SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
// fold vector ops
if (VT.isVector()) {
- // TODO SimplifyVBinOp
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
+ return FoldedVOp;
// fold (sub_sat x, 0) -> x, vector edition
if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
@@ -3837,19 +3935,20 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
+ SDLoc DL(N);
// fold (mul x, undef) -> 0
if (N0.isUndef() || N1.isUndef())
- return DAG.getConstant(0, SDLoc(N), VT);
+ return DAG.getConstant(0, DL, VT);
// fold (mul c1, c2) -> c1*c2
- if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, {N0, N1}))
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, DL, VT, {N0, N1}))
return C;
// canonicalize constant to RHS (vector doesn't have to splat)
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
!DAG.isConstantIntBuildVectorOrConstantInt(N1))
- return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
+ return DAG.getNode(ISD::MUL, DL, VT, N1, N0);
bool N1IsConst = false;
bool N1IsOpaqueConst = false;
@@ -3857,7 +3956,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
// fold vector ops
if (VT.isVector()) {
- if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
@@ -3884,17 +3983,14 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
return NewSel;
// fold (mul x, -1) -> 0-x
- if (N1IsConst && ConstValue1.isAllOnes()) {
- SDLoc DL(N);
+ if (N1IsConst && ConstValue1.isAllOnes())
return DAG.getNode(ISD::SUB, DL, VT,
DAG.getConstant(0, DL, VT), N0);
- }
// fold (mul x, (1 << c)) -> x << c
if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
DAG.isKnownToBeAPowerOfTwo(N1) &&
(!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
- SDLoc DL(N);
SDValue LogBase2 = BuildLogBase2(N1, DL);
EVT ShiftVT = getShiftAmountTy(N0.getValueType());
SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
@@ -3904,7 +4000,6 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
// fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) {
unsigned Log2Val = (-ConstValue1).logBase2();
- SDLoc DL(N);
// FIXME: If the input is something that is easily negated (e.g. a
// single-use add), we should put the negate there.
return DAG.getNode(ISD::SUB, DL, VT,
@@ -3949,7 +4044,6 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
ShAmt += TZeros;
assert(ShAmt < VT.getScalarSizeInBits() &&
"multiply-by-constant generated out of bounds shift");
- SDLoc DL(N);
SDValue Shl =
DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
SDValue R =
@@ -3964,12 +4058,10 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
}
// (mul (shl X, c1), c2) -> (mul X, c2 << c1)
- if (N0.getOpcode() == ISD::SHL &&
- isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
- isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
- SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
- if (isConstantOrConstantVector(C3))
- return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
+ if (N0.getOpcode() == ISD::SHL) {
+ SDValue N01 = N0.getOperand(1);
+ if (SDValue C3 = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N1, N01}))
+ return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), C3);
}
// Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
@@ -3979,18 +4071,17 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
// Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
if (N0.getOpcode() == ISD::SHL &&
- isConstantOrConstantVector(N0.getOperand(1)) &&
- N0.getNode()->hasOneUse()) {
+ isConstantOrConstantVector(N0.getOperand(1)) && N0->hasOneUse()) {
Sh = N0; Y = N1;
} else if (N1.getOpcode() == ISD::SHL &&
isConstantOrConstantVector(N1.getOperand(1)) &&
- N1.getNode()->hasOneUse()) {
+ N1->hasOneUse()) {
Sh = N1; Y = N0;
}
if (Sh.getNode()) {
- SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
- return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
+ SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, Sh.getOperand(0), Y);
+ return DAG.getNode(ISD::SHL, DL, VT, Mul, Sh.getOperand(1));
}
}
@@ -3999,18 +4090,17 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
N0.getOpcode() == ISD::ADD &&
DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
isMulAddWithConstProfitable(N, N0, N1))
- return DAG.getNode(ISD::ADD, SDLoc(N), VT,
- DAG.getNode(ISD::MUL, SDLoc(N0), VT,
- N0.getOperand(0), N1),
- DAG.getNode(ISD::MUL, SDLoc(N1), VT,
- N0.getOperand(1), N1));
+ return DAG.getNode(
+ ISD::ADD, DL, VT,
+ DAG.getNode(ISD::MUL, SDLoc(N0), VT, N0.getOperand(0), N1),
+ DAG.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1));
// Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
if (N0.getOpcode() == ISD::VSCALE)
if (ConstantSDNode *NC1 = isConstOrConstSplat(N1)) {
const APInt &C0 = N0.getConstantOperandAPInt(0);
const APInt &C1 = NC1->getAPIntValue();
- return DAG.getVScale(SDLoc(N), VT, C0 * C1);
+ return DAG.getVScale(DL, VT, C0 * C1);
}
// Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
@@ -4019,7 +4109,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
if (ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
const APInt &C0 = N0.getConstantOperandAPInt(0);
APInt NewStep = C0 * MulVal;
- return DAG.getStepVector(SDLoc(N), VT, NewStep);
+ return DAG.getStepVector(DL, VT, NewStep);
}
// Fold ((mul x, 0/undef) -> 0,
@@ -4041,7 +4131,6 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) {
assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector");
- SDLoc DL(N);
EVT LegalSVT = N1.getOperand(0).getValueType();
SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
@@ -4054,7 +4143,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
}
// reassociate mul
- if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
+ if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags()))
return RMUL;
return SDValue();
@@ -4117,7 +4206,7 @@ SDValue DAGCombiner::useDivRem(SDNode *Node) {
SDValue Op0 = Node->getOperand(0);
SDValue Op1 = Node->getOperand(1);
SDValue combined;
- for (SDNode *User : Op0.getNode()->uses()) {
+ for (SDNode *User : Op0->uses()) {
if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
User->use_empty())
continue;
@@ -4257,12 +4346,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
return SDValue();
}
-SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
- SDLoc DL(N);
- EVT VT = N->getValueType(0);
- EVT CCVT = getSetCCResultType(VT);
- unsigned BitWidth = VT.getScalarSizeInBits();
-
+static bool isDivisorPowerOfTwo(SDValue Divisor) {
// Helper for determining whether a value is a power-2 constant scalar or a
// vector of such elements.
auto IsPowerOfTwo = [](ConstantSDNode *C) {
@@ -4275,11 +4359,20 @@ SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
return false;
};
+ return ISD::matchUnaryPredicate(Divisor, IsPowerOfTwo);
+}
+
+SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ EVT CCVT = getSetCCResultType(VT);
+ unsigned BitWidth = VT.getScalarSizeInBits();
+
// fold (sdiv X, pow2) -> simple ops after legalize
// FIXME: We check for the exact bit here because the generic lowering gives
// better results in that case. The target-specific lowering should learn how
// to handle exact sdivs efficiently.
- if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
+ if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1)) {
// Target-specific implementation of sdiv x, pow2.
if (SDValue Res = BuildSDIVPow2(N))
return Res;
@@ -4435,6 +4528,16 @@ SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N) {
+ if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1) &&
+ !DAG.doesNodeExist(ISD::SDIV, N->getVTList(), {N0, N1})) {
+ // Target-specific implementation of srem x, pow2.
+ if (SDValue Res = BuildSREMPow2(N))
+ return Res;
+ }
+ return SDValue();
+}
+
// handles ISD::SREM and ISD::UREM
SDValue DAGCombiner::visitREM(SDNode *N) {
unsigned Opcode = N->getOpcode();
@@ -4451,10 +4554,13 @@ SDValue DAGCombiner::visitREM(SDNode *N) {
if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
return C;
- // fold (urem X, -1) -> select(X == -1, 0, x)
- if (!isSigned && N1C && N1C->isAllOnes())
- return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
- DAG.getConstant(0, DL, VT), N0);
+ // fold (urem X, -1) -> select(FX == -1, 0, FX)
+ // Freeze the numerator to avoid a miscompile with an undefined value.
+ if (!isSigned && N1C && N1C->isAllOnes()) {
+ SDValue F0 = DAG.getFreeze(N0);
+ SDValue EqualsNeg1 = DAG.getSetCC(DL, CCVT, F0, N1, ISD::SETEQ);
+ return DAG.getSelect(DL, VT, EqualsNeg1, DAG.getConstant(0, DL, VT), F0);
+ }
if (SDValue V = simplifyDivRem(N, DAG))
return V;
@@ -4495,6 +4601,12 @@ SDValue DAGCombiner::visitREM(SDNode *N) {
// combine will not return a DIVREM. Regardless, checking cheapness here
// makes sense since the simplification results in fatter code.
if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
+ if (isSigned) {
+ // check if we can build faster implementation for srem
+ if (SDValue OptimizedRem = buildOptimizedSREM(N0, N1, N))
+ return OptimizedRem;
+ }
+
SDValue OptimizedDiv =
isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != N) {
@@ -4654,6 +4766,46 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitAVG(SDNode *N) {
+ unsigned Opcode = N->getOpcode();
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ // fold (avg c1, c2)
+ if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
+ return C;
+
+ // canonicalize constant to RHS.
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
+
+ if (VT.isVector()) {
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
+ return FoldedVOp;
+
+ // fold (avgfloor x, 0) -> x >> 1
+ if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) {
+ if (Opcode == ISD::AVGFLOORS)
+ return DAG.getNode(ISD::SRA, DL, VT, N0, DAG.getConstant(1, DL, VT));
+ if (Opcode == ISD::AVGFLOORU)
+ return DAG.getNode(ISD::SRL, DL, VT, N0, DAG.getConstant(1, DL, VT));
+ }
+ }
+
+ // fold (avg x, undef) -> x
+ if (N0.isUndef())
+ return N1;
+ if (N1.isUndef())
+ return N0;
+
+ // TODO If we use avg for scalars anywhere, we can add (avgfl x, 0) -> x >> 1
+
+ return SDValue();
+}
+
/// Perform optimizations common to nodes that compute two values. LoOp and HiOp
/// give the opcodes for the two computations that are being performed. Return
/// true if a simplification was made.
@@ -4812,7 +4964,9 @@ SDValue DAGCombiner::visitMULO(SDNode *N) {
DAG.getConstant(0, DL, CarryVT));
// (mulo x, 2) -> (addo x, x)
- if (N1C && N1C->getAPIntValue() == 2)
+ // FIXME: This needs a freeze.
+ if (N1C && N1C->getAPIntValue() == 2 &&
+ (!IsSigned || VT.getScalarSizeInBits() > 2))
return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
N->getVTList(), N0, N0);
@@ -4869,8 +5023,7 @@ static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2,
return 0;
const APInt &C1 = N1C->getAPIntValue();
const APInt &C2 = N3C->getAPIntValue();
- if (C1.getBitWidth() < C2.getBitWidth() ||
- C1 != C2.sextOrSelf(C1.getBitWidth()))
+ if (C1.getBitWidth() < C2.getBitWidth() || C1 != C2.sext(C1.getBitWidth()))
return 0;
return CC == ISD::SETLT ? ISD::SMIN : (CC == ISD::SETGT ? ISD::SMAX : 0);
};
@@ -4977,7 +5130,7 @@ static SDValue PerformUMinFpToSatCombine(SDValue N0, SDValue N1, SDValue N2,
const APInt &C1 = N1C->getAPIntValue();
const APInt &C3 = N3C->getAPIntValue();
if (!(C1 + 1).isPowerOf2() || C1.getBitWidth() < C3.getBitWidth() ||
- C1 != C3.zextOrSelf(C1.getBitWidth()))
+ C1 != C3.zext(C1.getBitWidth()))
return SDValue();
unsigned BW = (C1 + 1).exactLogBase2();
@@ -5007,6 +5160,10 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
return C;
+ // If the operands are the same, this is a no-op.
+ if (N0 == N1)
+ return N0;
+
// canonicalize constant to RHS
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
!DAG.isConstantIntBuildVectorOrConstantInt(N1))
@@ -5312,29 +5469,27 @@ SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
}
// Turn compare of constants whose difference is 1 bit into add+and+setcc.
- // TODO - support non-uniform vector amounts.
if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
// Match a shared variable operand and 2 non-opaque constant operands.
- ConstantSDNode *C0 = isConstOrConstSplat(LR);
- ConstantSDNode *C1 = isConstOrConstSplat(RR);
- if (LL == RL && C0 && C1 && !C0->isOpaque() && !C1->isOpaque()) {
+ auto MatchDiffPow2 = [&](ConstantSDNode *C0, ConstantSDNode *C1) {
+ // The difference of the constants must be a single bit.
const APInt &CMax =
APIntOps::umax(C0->getAPIntValue(), C1->getAPIntValue());
const APInt &CMin =
APIntOps::umin(C0->getAPIntValue(), C1->getAPIntValue());
- // The difference of the constants must be a single bit.
- if ((CMax - CMin).isPowerOf2()) {
- // and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) -->
- // setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq
- SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR);
- SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR);
- SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min);
- SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min);
- SDValue Mask = DAG.getNOT(DL, Diff, OpVT);
- SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask);
- SDValue Zero = DAG.getConstant(0, DL, OpVT);
- return DAG.getSetCC(DL, VT, And, Zero, CC0);
- }
+ return !C0->isOpaque() && !C1->isOpaque() && (CMax - CMin).isPowerOf2();
+ };
+ if (LL == RL && ISD::matchBinaryPredicate(LR, RR, MatchDiffPow2)) {
+ // and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) -->
+ // setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq
+ SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR);
+ SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR);
+ SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min);
+ SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min);
+ SDValue Mask = DAG.getNOT(DL, Diff, OpVT);
+ SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask);
+ SDValue Zero = DAG.getConstant(0, DL, OpVT);
+ return DAG.getSetCC(DL, VT, And, Zero, CC0);
}
}
}
@@ -5836,6 +5991,9 @@ static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {
if (ShiftAmt.uge(VTBitWidth))
return SDValue();
+ if (!TLI.hasBitTest(Srl.getOperand(0), Srl.getOperand(1)))
+ return SDValue();
+
// Turn this into a bit-test pattern using mask op + setcc:
// and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
SDLoc DL(And);
@@ -5882,6 +6040,53 @@ static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG) {
return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0), SignMask);
}
+/// Given a bitwise logic operation N with a matching bitwise logic operand,
+/// fold a pattern where 2 of the source operands are identically shifted
+/// values. For example:
+/// ((X0 << Y) | Z) | (X1 << Y) --> ((X0 | X1) << Y) | Z
+static SDValue foldLogicOfShifts(SDNode *N, SDValue LogicOp, SDValue ShiftOp,
+ SelectionDAG &DAG) {
+ unsigned LogicOpcode = N->getOpcode();
+ assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
+ LogicOpcode == ISD::XOR)
+ && "Expected bitwise logic operation");
+
+ if (!LogicOp.hasOneUse() || !ShiftOp.hasOneUse())
+ return SDValue();
+
+ // Match another bitwise logic op and a shift.
+ unsigned ShiftOpcode = ShiftOp.getOpcode();
+ if (LogicOp.getOpcode() != LogicOpcode ||
+ !(ShiftOpcode == ISD::SHL || ShiftOpcode == ISD::SRL ||
+ ShiftOpcode == ISD::SRA))
+ return SDValue();
+
+ // Match another shift op inside the first logic operand. Handle both commuted
+ // possibilities.
+ // LOGIC (LOGIC (SH X0, Y), Z), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
+ // LOGIC (LOGIC Z, (SH X0, Y)), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
+ SDValue X1 = ShiftOp.getOperand(0);
+ SDValue Y = ShiftOp.getOperand(1);
+ SDValue X0, Z;
+ if (LogicOp.getOperand(0).getOpcode() == ShiftOpcode &&
+ LogicOp.getOperand(0).getOperand(1) == Y) {
+ X0 = LogicOp.getOperand(0).getOperand(0);
+ Z = LogicOp.getOperand(1);
+ } else if (LogicOp.getOperand(1).getOpcode() == ShiftOpcode &&
+ LogicOp.getOperand(1).getOperand(1) == Y) {
+ X0 = LogicOp.getOperand(1).getOperand(0);
+ Z = LogicOp.getOperand(0);
+ } else {
+ return SDValue();
+ }
+
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+ SDValue LogicX = DAG.getNode(LogicOpcode, DL, VT, X0, X1);
+ SDValue NewShift = DAG.getNode(ShiftOpcode, DL, VT, LogicX, Y);
+ return DAG.getNode(LogicOpcode, DL, VT, NewShift, Z);
+}
+
SDValue DAGCombiner::visitAND(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -5915,27 +6120,25 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (ISD::isConstantSplatVectorAllOnes(N1.getNode()))
return N0;
- // fold (and (masked_load) (build_vec (x, ...))) to zext_masked_load
+ // fold (and (masked_load) (splat_vec (x, ...))) to zext_masked_load
auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0);
- auto *BVec = dyn_cast<BuildVectorSDNode>(N1);
- if (MLoad && BVec && MLoad->getExtensionType() == ISD::EXTLOAD &&
- N0.hasOneUse() && N1.hasOneUse()) {
+ ConstantSDNode *Splat = isConstOrConstSplat(N1, true, true);
+ if (MLoad && MLoad->getExtensionType() == ISD::EXTLOAD && N0.hasOneUse() &&
+ Splat && N1.hasOneUse()) {
EVT LoadVT = MLoad->getMemoryVT();
EVT ExtVT = VT;
if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) {
// For this AND to be a zero extension of the masked load the elements
// of the BuildVec must mask the bottom bits of the extended element
// type
- if (ConstantSDNode *Splat = BVec->getConstantSplatNode()) {
- uint64_t ElementSize =
- LoadVT.getVectorElementType().getScalarSizeInBits();
- if (Splat->getAPIntValue().isMask(ElementSize)) {
- return DAG.getMaskedLoad(
- ExtVT, SDLoc(N), MLoad->getChain(), MLoad->getBasePtr(),
- MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
- LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
- ISD::ZEXTLOAD, MLoad->isExpandingLoad());
- }
+ uint64_t ElementSize =
+ LoadVT.getVectorElementType().getScalarSizeInBits();
+ if (Splat->getAPIntValue().isMask(ElementSize)) {
+ return DAG.getMaskedLoad(
+ ExtVT, SDLoc(N), MLoad->getChain(), MLoad->getBasePtr(),
+ MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
+ LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
+ ISD::ZEXTLOAD, MLoad->isExpandingLoad());
}
}
}
@@ -6011,7 +6214,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// This can be a pure constant or a vector splat, in which case we treat the
// vector as a scalar and use the splat value.
APInt Constant = APInt::getZero(1);
- if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
+ if (const ConstantSDNode *C = isConstOrConstSplat(N1)) {
Constant = C->getAPIntValue();
} else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
APInt SplatValue, SplatUndef;
@@ -6151,6 +6354,11 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
return V;
+ if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
+ return R;
+ if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
+ return R;
+
// Masking the negated extension of a boolean is just the zero-extended
// boolean:
// and (sub 0, zext(bool X)), 1 --> zext(bool X)
@@ -6209,9 +6417,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
return Shifts;
- if (TLI.hasBitTest(N0, N1))
- if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
- return V;
+ if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
+ return V;
// Recognize the following pattern:
//
@@ -6261,11 +6468,11 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
bool LookPassAnd0 = false;
bool LookPassAnd1 = false;
if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
- std::swap(N0, N1);
+ std::swap(N0, N1);
if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
- std::swap(N0, N1);
+ std::swap(N0, N1);
if (N0.getOpcode() == ISD::AND) {
- if (!N0.getNode()->hasOneUse())
+ if (!N0->hasOneUse())
return SDValue();
ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
// Also handle 0xffff since the LHS is guaranteed to have zeros there.
@@ -6278,7 +6485,7 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
}
if (N1.getOpcode() == ISD::AND) {
- if (!N1.getNode()->hasOneUse())
+ if (!N1->hasOneUse())
return SDValue();
ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
if (!N11C || N11C->getZExtValue() != 0xFF)
@@ -6291,7 +6498,7 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
std::swap(N0, N1);
if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
return SDValue();
- if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
+ if (!N0->hasOneUse() || !N1->hasOneUse())
return SDValue();
ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
@@ -6304,7 +6511,7 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
// Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
SDValue N00 = N0->getOperand(0);
if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
- if (!N00.getNode()->hasOneUse())
+ if (!N00->hasOneUse())
return SDValue();
ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
if (!N001C || N001C->getZExtValue() != 0xFF)
@@ -6315,7 +6522,7 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
SDValue N10 = N1->getOperand(0);
if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
- if (!N10.getNode()->hasOneUse())
+ if (!N10->hasOneUse())
return SDValue();
ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
// Also allow 0xFFFF since the bits will be shifted out. This is needed
@@ -6333,19 +6540,23 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
// Make sure everything beyond the low halfword gets set to zero since the SRL
// 16 will clear the top bits.
unsigned OpSizeInBits = VT.getSizeInBits();
- if (DemandHighBits && OpSizeInBits > 16) {
+ if (OpSizeInBits > 16) {
// If the left-shift isn't masked out then the only way this is a bswap is
// if all bits beyond the low 8 are 0. In that case the entire pattern
// reduces to a left shift anyway: leave it for other parts of the combiner.
- if (!LookPassAnd0)
+ if (DemandHighBits && !LookPassAnd0)
return SDValue();
// However, if the right shift isn't masked out then it might be because
- // it's not needed. See if we can spot that too.
- if (!LookPassAnd1 &&
- !DAG.MaskedValueIsZero(
- N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
- return SDValue();
+ // it's not needed. See if we can spot that too. If the high bits aren't
+ // demanded, we only need bits 23:16 to be zero. Otherwise, we need all
+ // upper bits to be zero.
+ if (!LookPassAnd1) {
+ unsigned HighBit = DemandHighBits ? OpSizeInBits : 24;
+ if (!DAG.MaskedValueIsZero(N10,
+ APInt::getBitsSet(OpSizeInBits, 16, HighBit)))
+ return SDValue();
+ }
}
SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
@@ -6365,7 +6576,7 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
/// ((x & 0x00ff0000) << 8) |
/// ((x & 0xff000000) >> 8)
static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
- if (!N.getNode()->hasOneUse())
+ if (!N->hasOneUse())
return false;
unsigned Opc = N.getOpcode();
@@ -6552,8 +6763,9 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
!(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
return SDValue();
- } else
+ } else {
return SDValue();
+ }
// Make sure the parts are all coming from the same node.
if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
@@ -6591,7 +6803,7 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
// (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
// Don't increase # computations.
- (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
+ (N0->hasOneUse() || N1->hasOneUse())) {
// We can only do this xform if we know that bits from X that are set in C2
// but not in C1 are already zero. Likewise for Y.
if (const ConstantSDNode *N0O1C =
@@ -6619,7 +6831,7 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
N1.getOpcode() == ISD::AND &&
N0.getOperand(0) == N1.getOperand(0) &&
// Don't increase # computations.
- (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
+ (N0->hasOneUse() || N1->hasOneUse())) {
SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
N0.getOperand(1), N1.getOperand(1));
return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
@@ -6634,14 +6846,38 @@ static SDValue visitORCommutative(
EVT VT = N0.getValueType();
if (N0.getOpcode() == ISD::AND) {
// fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
- if (isBitwiseNot(N0.getOperand(1)) && N0.getOperand(1).getOperand(0) == N1)
+ // TODO: Set AllowUndefs = true.
+ if (getBitwiseNotOperand(N0.getOperand(1), N0.getOperand(0),
+ /* AllowUndefs */ false) == N1)
return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(0), N1);
// fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
- if (isBitwiseNot(N0.getOperand(0)) && N0.getOperand(0).getOperand(0) == N1)
+ if (getBitwiseNotOperand(N0.getOperand(0), N0.getOperand(1),
+ /* AllowUndefs */ false) == N1)
return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1);
}
+ if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
+ return R;
+
+ auto peekThroughZext = [](SDValue V) {
+ if (V->getOpcode() == ISD::ZERO_EXTEND)
+ return V->getOperand(0);
+ return V;
+ };
+
+ // (fshl X, ?, Y) | (shl X, Y) --> fshl X, ?, Y
+ if (N0.getOpcode() == ISD::FSHL && N1.getOpcode() == ISD::SHL &&
+ N0.getOperand(0) == N1.getOperand(0) &&
+ peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
+ return N0;
+
+ // (fshr ?, X, Y) | (srl X, Y) --> fshr ?, X, Y
+ if (N0.getOpcode() == ISD::FSHR && N1.getOpcode() == ISD::SRL &&
+ N0.getOperand(1) == N1.getOperand(0) &&
+ peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
+ return N0;
+
return SDValue();
}
@@ -6678,11 +6914,10 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
// fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
- // Do this only if the resulting shuffle is legal.
- if (isa<ShuffleVectorSDNode>(N0) &&
- isa<ShuffleVectorSDNode>(N1) &&
- // Avoid folding a node with illegal type.
- TLI.isTypeLegal(VT)) {
+ // Do this only if the resulting type / shuffle is legal.
+ auto *SV0 = dyn_cast<ShuffleVectorSDNode>(N0);
+ auto *SV1 = dyn_cast<ShuffleVectorSDNode>(N1);
+ if (SV0 && SV1 && TLI.isTypeLegal(VT)) {
bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
@@ -6691,11 +6926,9 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
- const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
- const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
bool CanFold = true;
int NumElts = VT.getVectorNumElements();
- SmallVector<int, 4> Mask(NumElts);
+ SmallVector<int, 4> Mask(NumElts, -1);
for (int i = 0; i != NumElts; ++i) {
int M0 = SV0->getMaskElt(i);
@@ -6707,10 +6940,8 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
// If one element is zero and the otherside is undef, keep undef.
// This also handles the case that both are undef.
- if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
- Mask[i] = -1;
+ if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0))
continue;
- }
// Make sure only one of the elements is zero.
if (M0Zero == M1Zero) {
@@ -6778,7 +7009,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
};
- if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
+ if (N0.getOpcode() == ISD::AND && N0->hasOneUse() &&
ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
{N1, N0.getOperand(1)})) {
@@ -7098,8 +7329,9 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
// Neg with outer conversions stripped away.
SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
SDValue Neg, SDValue InnerPos,
- SDValue InnerNeg, unsigned PosOpcode,
- unsigned NegOpcode, const SDLoc &DL) {
+ SDValue InnerNeg, bool HasPos,
+ unsigned PosOpcode, unsigned NegOpcode,
+ const SDLoc &DL) {
// fold (or (shl x, (*ext y)),
// (srl x, (*ext (sub 32, y)))) ->
// (rotl x, y) or (rotr x, (sub 32, y))
@@ -7110,7 +7342,6 @@ SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
EVT VT = Shifted.getValueType();
if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG,
/*IsRotate*/ true)) {
- bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
HasPos ? Pos : Neg);
}
@@ -7126,8 +7357,9 @@ SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
// TODO: Merge with MatchRotatePosNeg.
SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
SDValue Neg, SDValue InnerPos,
- SDValue InnerNeg, unsigned PosOpcode,
- unsigned NegOpcode, const SDLoc &DL) {
+ SDValue InnerNeg, bool HasPos,
+ unsigned PosOpcode, unsigned NegOpcode,
+ const SDLoc &DL) {
EVT VT = N0.getValueType();
unsigned EltBits = VT.getScalarSizeInBits();
@@ -7139,7 +7371,6 @@ SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
// (srl x1, (*ext y))) ->
// (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1)) {
- bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
HasPos ? Pos : Neg);
}
@@ -7201,6 +7432,16 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
bool HasROTR = hasOperation(ISD::ROTR, VT);
bool HasFSHL = hasOperation(ISD::FSHL, VT);
bool HasFSHR = hasOperation(ISD::FSHR, VT);
+
+ // If the type is going to be promoted and the target has enabled custom
+ // lowering for rotate, allow matching rotate by non-constants. Only allow
+ // this for scalar types.
+ if (VT.isScalarInteger() && TLI.getTypeAction(*DAG.getContext(), VT) ==
+ TargetLowering::TypePromoteInteger) {
+ HasROTL |= TLI.getOperationAction(ISD::ROTL, VT) == TargetLowering::Custom;
+ HasROTR |= TLI.getOperationAction(ISD::ROTR, VT) == TargetLowering::Custom;
+ }
+
if (LegalOperations && !HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
return SDValue();
@@ -7254,11 +7495,6 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
if (LHSShift.getOpcode() == RHSShift.getOpcode())
return SDValue(); // Shifts must disagree.
- // TODO: Support pre-legalization funnel-shift by constant.
- bool IsRotate = LHSShift.getOperand(0) == RHSShift.getOperand(0);
- if (!IsRotate && !(HasFSHL || HasFSHR))
- return SDValue(); // Requires funnel shift support.
-
// Canonicalize shl to left side in a shl/srl pair.
if (RHSShift.getOpcode() == ISD::SHL) {
std::swap(LHS, RHS);
@@ -7272,27 +7508,12 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
SDValue RHSShiftArg = RHSShift.getOperand(0);
SDValue RHSShiftAmt = RHSShift.getOperand(1);
- // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
- // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
- // fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
- // fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
- // iff C1+C2 == EltSizeInBits
auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
ConstantSDNode *RHS) {
return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
};
- if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
- SDValue Res;
- if (IsRotate && (HasROTL || HasROTR || !(HasFSHL || HasFSHR))) {
- bool UseROTL = !LegalOperations || HasROTL;
- Res = DAG.getNode(UseROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
- UseROTL ? LHSShiftAmt : RHSShiftAmt);
- } else {
- bool UseFSHL = !LegalOperations || HasFSHL;
- Res = DAG.getNode(UseFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
- RHSShiftArg, UseFSHL ? LHSShiftAmt : RHSShiftAmt);
- }
+ auto ApplyMasks = [&](SDValue Res) {
// If there is an AND of either shifted operand, apply it to the result.
if (LHSMask.getNode() || RHSMask.getNode()) {
SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
@@ -7313,6 +7534,71 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
}
return Res;
+ };
+
+ // TODO: Support pre-legalization funnel-shift by constant.
+ bool IsRotate = LHSShift.getOperand(0) == RHSShift.getOperand(0);
+ if (!IsRotate && !(HasFSHL || HasFSHR)) {
+ if (TLI.isTypeLegal(VT) && LHS.hasOneUse() && RHS.hasOneUse() &&
+ ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
+ // Look for a disguised rotate by constant.
+ // The common shifted operand X may be hidden inside another 'or'.
+ SDValue X, Y;
+ auto matchOr = [&X, &Y](SDValue Or, SDValue CommonOp) {
+ if (!Or.hasOneUse() || Or.getOpcode() != ISD::OR)
+ return false;
+ if (CommonOp == Or.getOperand(0)) {
+ X = CommonOp;
+ Y = Or.getOperand(1);
+ return true;
+ }
+ if (CommonOp == Or.getOperand(1)) {
+ X = CommonOp;
+ Y = Or.getOperand(0);
+ return true;
+ }
+ return false;
+ };
+
+ SDValue Res;
+ if (matchOr(LHSShiftArg, RHSShiftArg)) {
+ // (shl (X | Y), C1) | (srl X, C2) --> (rotl X, C1) | (shl Y, C1)
+ SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
+ SDValue ShlY = DAG.getNode(ISD::SHL, DL, VT, Y, LHSShiftAmt);
+ Res = DAG.getNode(ISD::OR, DL, VT, RotX, ShlY);
+ } else if (matchOr(RHSShiftArg, LHSShiftArg)) {
+ // (shl X, C1) | (srl (X | Y), C2) --> (rotl X, C1) | (srl Y, C2)
+ SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
+ SDValue SrlY = DAG.getNode(ISD::SRL, DL, VT, Y, RHSShiftAmt);
+ Res = DAG.getNode(ISD::OR, DL, VT, RotX, SrlY);
+ } else {
+ return SDValue();
+ }
+
+ return ApplyMasks(Res);
+ }
+
+ return SDValue(); // Requires funnel shift support.
+ }
+
+ // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
+ // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
+ // fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
+ // fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
+ // iff C1+C2 == EltSizeInBits
+ if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
+ SDValue Res;
+ if (IsRotate && (HasROTL || HasROTR || !(HasFSHL || HasFSHR))) {
+ bool UseROTL = !LegalOperations || HasROTL;
+ Res = DAG.getNode(UseROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
+ UseROTL ? LHSShiftAmt : RHSShiftAmt);
+ } else {
+ bool UseFSHL = !LegalOperations || HasFSHL;
+ Res = DAG.getNode(UseFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
+ RHSShiftArg, UseFSHL ? LHSShiftAmt : RHSShiftAmt);
+ }
+
+ return ApplyMasks(Res);
}
// Even pre-legalization, we can't easily rotate/funnel-shift by a variable
@@ -7343,26 +7629,26 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
if (IsRotate && (HasROTL || HasROTR)) {
SDValue TryL =
MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0,
- RExtOp0, ISD::ROTL, ISD::ROTR, DL);
+ RExtOp0, HasROTL, ISD::ROTL, ISD::ROTR, DL);
if (TryL)
return TryL;
SDValue TryR =
MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0,
- LExtOp0, ISD::ROTR, ISD::ROTL, DL);
+ LExtOp0, HasROTR, ISD::ROTR, ISD::ROTL, DL);
if (TryR)
return TryR;
}
SDValue TryL =
MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt, RHSShiftAmt,
- LExtOp0, RExtOp0, ISD::FSHL, ISD::FSHR, DL);
+ LExtOp0, RExtOp0, HasFSHL, ISD::FSHL, ISD::FSHR, DL);
if (TryL)
return TryL;
SDValue TryR =
MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
- RExtOp0, LExtOp0, ISD::FSHR, ISD::FSHL, DL);
+ RExtOp0, LExtOp0, HasFSHR, ISD::FSHR, ISD::FSHL, DL);
if (TryR)
return TryR;
@@ -7877,7 +8163,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
// little endian value load
Optional<bool> IsBigEndian = isBigEndian(
makeArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
- if (!IsBigEndian.hasValue())
+ if (!IsBigEndian)
return SDValue();
assert(FirstByteProvider && "must be set");
@@ -8084,6 +8370,13 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
return RXOR;
+ // look for 'add-like' folds:
+ // XOR(N0,MIN_SIGNED_VALUE) == ADD(N0,MIN_SIGNED_VALUE)
+ if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
+ isMinSignedConstant(N1))
+ if (SDValue Combined = visitADDLike(N))
+ return Combined;
+
// fold !(x cc y) -> (x !cc y)
unsigned N0Opcode = N0.getOpcode();
SDValue LHS, RHS, CC;
@@ -8249,6 +8542,11 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
return V;
+ if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
+ return R;
+ if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
+ return R;
+
// Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable
if (SDValue MM = unfoldMaskedMerge(N))
return MM;
@@ -8479,7 +8777,9 @@ SDValue DAGCombiner::visitRotate(SDNode *N) {
}
unsigned NextOp = N0.getOpcode();
- // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
+
+ // fold (rot* (rot* x, c2), c1)
+ // -> (rot* x, ((c1 % bitsize) +- (c2 % bitsize)) % bitsize)
if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
@@ -8487,14 +8787,19 @@ SDValue DAGCombiner::visitRotate(SDNode *N) {
EVT ShiftVT = C1->getValueType(0);
bool SameSide = (N->getOpcode() == NextOp);
unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
- if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
- CombineOp, dl, ShiftVT, {N1, N0.getOperand(1)})) {
- SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
- SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
- ISD::SREM, dl, ShiftVT, {CombinedShift, BitsizeC});
- return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
- CombinedShiftNorm);
- }
+ SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
+ SDValue Norm1 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
+ {N1, BitsizeC});
+ SDValue Norm2 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
+ {N0.getOperand(1), BitsizeC});
+ if (Norm1 && Norm2)
+ if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
+ CombineOp, dl, ShiftVT, {Norm1, Norm2})) {
+ SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
+ ISD::UREM, dl, ShiftVT, {CombinedShift, BitsizeC});
+ return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
+ CombinedShiftNorm);
+ }
}
}
return SDValue();
@@ -8654,52 +8959,63 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
}
}
- // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
- // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 > C2
- // TODO - support non-uniform vector shift amounts.
- ConstantSDNode *N1C = isConstOrConstSplat(N1);
- if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
- N0->getFlags().hasExact()) {
- if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
- uint64_t C1 = N0C1->getZExtValue();
- uint64_t C2 = N1C->getZExtValue();
- SDLoc DL(N);
- if (C1 <= C2)
- return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
- DAG.getConstant(C2 - C1, DL, ShiftVT));
- return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
- DAG.getConstant(C1 - C2, DL, ShiftVT));
+ if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) {
+ auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
+ ConstantSDNode *RHS) {
+ const APInt &LHSC = LHS->getAPIntValue();
+ const APInt &RHSC = RHS->getAPIntValue();
+ return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
+ LHSC.getZExtValue() <= RHSC.getZExtValue();
+ };
+
+ SDLoc DL(N);
+
+ // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
+ // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 >= C2
+ if (N0->getFlags().hasExact()) {
+ if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
+ /*AllowUndefs*/ false,
+ /*AllowTypeMismatch*/ true)) {
+ SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
+ SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
+ return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
+ }
+ if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
+ /*AllowUndefs*/ false,
+ /*AllowTypeMismatch*/ true)) {
+ SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
+ SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
+ return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Diff);
+ }
}
- }
- // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
- // (and (srl x, (sub c1, c2), MASK)
- // Only fold this if the inner shift has no other uses -- if it does, folding
- // this will increase the total number of instructions.
- // TODO - drop hasOneUse requirement if c1 == c2?
- // TODO - support non-uniform vector shift amounts.
- if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
- TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
- if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
- if (N0C1->getAPIntValue().ult(OpSizeInBits)) {
- uint64_t c1 = N0C1->getZExtValue();
- uint64_t c2 = N1C->getZExtValue();
- APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
- SDValue Shift;
- if (c2 > c1) {
- Mask <<= c2 - c1;
- SDLoc DL(N);
- Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
- DAG.getConstant(c2 - c1, DL, ShiftVT));
- } else {
- Mask.lshrInPlace(c1 - c2);
- SDLoc DL(N);
- Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
- DAG.getConstant(c1 - c2, DL, ShiftVT));
- }
- SDLoc DL(N0);
- return DAG.getNode(ISD::AND, DL, VT, Shift,
- DAG.getConstant(Mask, DL, VT));
+ // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
+ // (and (srl x, (sub c1, c2), MASK)
+ // Only fold this if the inner shift has no other uses -- if it does,
+ // folding this will increase the total number of instructions.
+ if (N0.getOpcode() == ISD::SRL &&
+ (N0.getOperand(1) == N1 || N0.hasOneUse()) &&
+ TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
+ if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
+ /*AllowUndefs*/ false,
+ /*AllowTypeMismatch*/ true)) {
+ SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
+ SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
+ SDValue Mask = DAG.getAllOnesConstant(DL, VT);
+ Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N01);
+ Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, Diff);
+ SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
+ return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
+ }
+ if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
+ /*AllowUndefs*/ false,
+ /*AllowTypeMismatch*/ true)) {
+ SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
+ SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
+ SDValue Mask = DAG.getAllOnesConstant(DL, VT);
+ Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N1);
+ SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
+ return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
}
}
}
@@ -8718,7 +9034,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
// Variant of version done on multiply, except mul by a power of 2 is turned
// into a shift.
if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
- N0.getNode()->hasOneUse() &&
+ N0->hasOneUse() &&
isConstantOrConstantVector(N1, /* No Opaques */ true) &&
isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) &&
TLI.isDesirableToCommuteWithShift(N, Level)) {
@@ -8730,14 +9046,14 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
}
// fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
- if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
- isConstantOrConstantVector(N1, /* No Opaques */ true) &&
- isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
- SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
- if (isConstantOrConstantVector(Shl))
+ if (N0.getOpcode() == ISD::MUL && N0->hasOneUse()) {
+ SDValue N01 = N0.getOperand(1);
+ if (SDValue Shl =
+ DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1}))
return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
}
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (N1C && !N1C->isOpaque())
if (SDValue NewSHL = visitShiftByConstant(N))
return NewSHL;
@@ -9023,8 +9339,10 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
if (LargeShift->getAPIntValue() == TruncBits) {
SDLoc DL(N);
- SDValue Amt = DAG.getConstant(N1C->getZExtValue() + TruncBits, DL,
- getShiftAmountTy(LargeVT));
+ EVT LargeShiftVT = getShiftAmountTy(LargeVT);
+ SDValue Amt = DAG.getZExtOrTrunc(N1, DL, LargeShiftVT);
+ Amt = DAG.getNode(ISD::ADD, DL, LargeShiftVT, Amt,
+ DAG.getConstant(TruncBits, DL, LargeShiftVT));
SDValue SRA =
DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
@@ -9063,6 +9381,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
return V;
EVT VT = N0.getValueType();
+ EVT ShiftVT = N1.getValueType();
unsigned OpSizeInBits = VT.getScalarSizeInBits();
// fold (srl c1, c2) -> c1 >>u c2
@@ -9104,7 +9423,6 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
};
if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
SDLoc DL(N);
- EVT ShiftVT = N1.getValueType();
SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
}
@@ -9148,15 +9466,41 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
}
}
- // fold (srl (shl x, c), c) -> (and x, cst2)
- // TODO - (srl (shl x, c1), c2).
- if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
- isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
- SDLoc DL(N);
- SDValue Mask =
- DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
- AddToWorklist(Mask.getNode());
- return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
+ // fold (srl (shl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) or
+ // (and (srl x, (sub c2, c1), MASK)
+ if (N0.getOpcode() == ISD::SHL &&
+ (N0.getOperand(1) == N1 || N0->hasOneUse()) &&
+ TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
+ auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
+ ConstantSDNode *RHS) {
+ const APInt &LHSC = LHS->getAPIntValue();
+ const APInt &RHSC = RHS->getAPIntValue();
+ return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
+ LHSC.getZExtValue() <= RHSC.getZExtValue();
+ };
+ if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
+ /*AllowUndefs*/ false,
+ /*AllowTypeMismatch*/ true)) {
+ SDLoc DL(N);
+ SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
+ SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
+ SDValue Mask = DAG.getAllOnesConstant(DL, VT);
+ Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N01);
+ Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, Diff);
+ SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
+ return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
+ }
+ if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
+ /*AllowUndefs*/ false,
+ /*AllowTypeMismatch*/ true)) {
+ SDLoc DL(N);
+ SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
+ SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
+ SDValue Mask = DAG.getAllOnesConstant(DL, VT);
+ Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N1);
+ SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
+ return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
+ }
}
// fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
@@ -9412,6 +9756,21 @@ SDValue DAGCombiner::visitSHLSAT(SDNode *N) {
DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, {N0, N1}))
return C;
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
+
+ if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) {
+ // fold (sshlsat x, c) -> (shl x, c)
+ if (N->getOpcode() == ISD::SSHLSAT && N1C &&
+ N1C->getAPIntValue().ult(DAG.ComputeNumSignBits(N0)))
+ return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N1);
+
+ // fold (ushlsat x, c) -> (shl x, c)
+ if (N->getOpcode() == ISD::USHLSAT && N1C &&
+ N1C->getAPIntValue().ule(
+ DAG.computeKnownBits(N0).countMinLeadingZeros()))
+ return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N1);
+ }
+
return SDValue();
}
@@ -9435,18 +9794,27 @@ static SDValue combineABSToABD(SDNode *N, SelectionDAG &DAG,
(Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND))
return SDValue();
+ EVT VT = N->getValueType(0);
EVT VT1 = Op0.getOperand(0).getValueType();
EVT VT2 = Op1.getOperand(0).getValueType();
- // Check if the operands are of same type and valid size.
unsigned ABDOpcode = (Opc0 == ISD::SIGN_EXTEND) ? ISD::ABDS : ISD::ABDU;
- if (VT1 != VT2 || !TLI.isOperationLegalOrCustom(ABDOpcode, VT1))
- return SDValue();
- Op0 = Op0.getOperand(0);
- Op1 = Op1.getOperand(0);
- SDValue ABD =
- DAG.getNode(ABDOpcode, SDLoc(N), Op0->getValueType(0), Op0, Op1);
- return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), ABD);
+ // fold abs(sext(x) - sext(y)) -> zext(abds(x, y))
+ // fold abs(zext(x) - zext(y)) -> zext(abdu(x, y))
+ // NOTE: Extensions must be equivalent.
+ if (VT1 == VT2 && TLI.isOperationLegalOrCustom(ABDOpcode, VT1)) {
+ Op0 = Op0.getOperand(0);
+ Op1 = Op1.getOperand(0);
+ SDValue ABD = DAG.getNode(ABDOpcode, SDLoc(N), VT1, Op0, Op1);
+ return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, ABD);
+ }
+
+ // fold abs(sext(x) - sext(y)) -> abds(sext(x), sext(y))
+ // fold abs(zext(x) - zext(y)) -> abdu(zext(x), zext(y))
+ if (TLI.isOperationLegalOrCustom(ABDOpcode, VT))
+ return DAG.getNode(ABDOpcode, SDLoc(N), VT, Op0, Op1);
+
+ return SDValue();
}
SDValue DAGCombiner::visitABS(SDNode *N) {
@@ -9472,24 +9840,60 @@ SDValue DAGCombiner::visitABS(SDNode *N) {
SDValue DAGCombiner::visitBSWAP(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ SDLoc DL(N);
// fold (bswap c1) -> c2
if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
- return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
+ return DAG.getNode(ISD::BSWAP, DL, VT, N0);
// fold (bswap (bswap x)) -> x
if (N0.getOpcode() == ISD::BSWAP)
- return N0->getOperand(0);
+ return N0.getOperand(0);
// Canonicalize bswap(bitreverse(x)) -> bitreverse(bswap(x)). If bitreverse
// isn't supported, it will be expanded to bswap followed by a manual reversal
// of bits in each byte. By placing bswaps before bitreverse, we can remove
// the two bswaps if the bitreverse gets expanded.
if (N0.getOpcode() == ISD::BITREVERSE && N0.hasOneUse()) {
- SDLoc DL(N);
SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
return DAG.getNode(ISD::BITREVERSE, DL, VT, BSwap);
}
+ // fold (bswap shl(x,c)) -> (zext(bswap(trunc(shl(x,sub(c,bw/2))))))
+ // iff x >= bw/2 (i.e. lower half is known zero)
+ unsigned BW = VT.getScalarSizeInBits();
+ if (BW >= 32 && N0.getOpcode() == ISD::SHL && N0.hasOneUse()) {
+ auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), BW / 2);
+ if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
+ ShAmt->getZExtValue() >= (BW / 2) &&
+ (ShAmt->getZExtValue() % 16) == 0 && TLI.isTypeLegal(HalfVT) &&
+ TLI.isTruncateFree(VT, HalfVT) &&
+ (!LegalOperations || hasOperation(ISD::BSWAP, HalfVT))) {
+ SDValue Res = N0.getOperand(0);
+ if (uint64_t NewShAmt = (ShAmt->getZExtValue() - (BW / 2)))
+ Res = DAG.getNode(ISD::SHL, DL, VT, Res,
+ DAG.getConstant(NewShAmt, DL, getShiftAmountTy(VT)));
+ Res = DAG.getZExtOrTrunc(Res, DL, HalfVT);
+ Res = DAG.getNode(ISD::BSWAP, DL, HalfVT, Res);
+ return DAG.getZExtOrTrunc(Res, DL, VT);
+ }
+ }
+
+ // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
+ // inverse-shift-of-bswap:
+ // bswap (X u<< C) --> (bswap X) u>> C
+ // bswap (X u>> C) --> (bswap X) u<< C
+ if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
+ N0.hasOneUse()) {
+ auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
+ ShAmt->getZExtValue() % 8 == 0) {
+ SDValue NewSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
+ unsigned InverseShift = N0.getOpcode() == ISD::SHL ? ISD::SRL : ISD::SHL;
+ return DAG.getNode(InverseShift, DL, VT, NewSwap, N0.getOperand(1));
+ }
+ }
+
return SDValue();
}
@@ -9740,7 +10144,8 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
if (C1Val.isPowerOf2() && C2Val.isZero()) {
if (VT != MVT::i1)
Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
- SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), DL, VT);
+ SDValue ShAmtC =
+ DAG.getShiftAmountConstant(C1Val.exactLogBase2(), VT, DL);
return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
}
@@ -10023,7 +10428,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))) {
// Any flags available in a select/setcc fold will be on the setcc as they
// migrated from fcmp
- Flags = N0.getNode()->getFlags();
+ Flags = N0->getFlags();
SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1,
N2, N0.getOperand(2));
SelectNode->setFlags(Flags);
@@ -10096,14 +10501,19 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
TopHalf->isZero() ? RHS->getOperand(1) : LHS->getOperand(1));
}
-bool refineUniformBase(SDValue &BasePtr, SDValue &Index, SelectionDAG &DAG) {
+bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled,
+ SelectionDAG &DAG) {
if (!isNullConstant(BasePtr) || Index.getOpcode() != ISD::ADD)
return false;
+ // Only perform the transformation when existing operands can be reused.
+ if (IndexIsScaled)
+ return false;
+
// For now we check only the LHS of the add.
SDValue LHS = Index.getOperand(0);
SDValue SplatVal = DAG.getSplatValue(LHS);
- if (!SplatVal)
+ if (!SplatVal || SplatVal.getValueType() != BasePtr.getValueType())
return false;
BasePtr = SplatVal;
@@ -10112,23 +10522,29 @@ bool refineUniformBase(SDValue &BasePtr, SDValue &Index, SelectionDAG &DAG) {
}
// Fold sext/zext of index into index type.
-bool refineIndexType(MaskedGatherScatterSDNode *MGS, SDValue &Index,
- bool Scaled, SelectionDAG &DAG) {
+bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT,
+ SelectionDAG &DAG) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ // It's always safe to look through zero extends.
if (Index.getOpcode() == ISD::ZERO_EXTEND) {
SDValue Op = Index.getOperand(0);
- MGS->setIndexType(Scaled ? ISD::UNSIGNED_SCALED : ISD::UNSIGNED_UNSCALED);
- if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) {
+ if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType(), DataVT)) {
+ IndexType = ISD::UNSIGNED_SCALED;
Index = Op;
return true;
}
+ if (ISD::isIndexTypeSigned(IndexType)) {
+ IndexType = ISD::UNSIGNED_SCALED;
+ return true;
+ }
}
- if (Index.getOpcode() == ISD::SIGN_EXTEND) {
+ // It's only safe to look through sign extends when Index is signed.
+ if (Index.getOpcode() == ISD::SIGN_EXTEND &&
+ ISD::isIndexTypeSigned(IndexType)) {
SDValue Op = Index.getOperand(0);
- MGS->setIndexType(Scaled ? ISD::SIGNED_SCALED : ISD::SIGNED_UNSCALED);
- if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) {
+ if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType(), DataVT)) {
Index = Op;
return true;
}
@@ -10145,24 +10561,25 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
SDValue Scale = MSC->getScale();
SDValue StoreVal = MSC->getValue();
SDValue BasePtr = MSC->getBasePtr();
+ ISD::MemIndexType IndexType = MSC->getIndexType();
SDLoc DL(N);
// Zap scatters with a zero mask.
if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
return Chain;
- if (refineUniformBase(BasePtr, Index, DAG)) {
+ if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG)) {
SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
- return DAG.getMaskedScatter(
- DAG.getVTList(MVT::Other), MSC->getMemoryVT(), DL, Ops,
- MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore());
+ return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
+ DL, Ops, MSC->getMemOperand(), IndexType,
+ MSC->isTruncatingStore());
}
- if (refineIndexType(MSC, Index, MSC->isIndexScaled(), DAG)) {
+ if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
- return DAG.getMaskedScatter(
- DAG.getVTList(MVT::Other), MSC->getMemoryVT(), DL, Ops,
- MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore());
+ return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
+ DL, Ops, MSC->getMemOperand(), IndexType,
+ MSC->isTruncatingStore());
}
return SDValue();
@@ -10217,7 +10634,7 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
// If this is a TRUNC followed by a masked store, fold this into a masked
// truncating store. We can do this even if this is already a masked
// truncstore.
- if ((Value.getOpcode() == ISD::TRUNCATE) && Value.getNode()->hasOneUse() &&
+ if ((Value.getOpcode() == ISD::TRUNCATE) && Value->hasOneUse() &&
MST->isUnindexed() &&
TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
MST->getMemoryVT(), LegalOperations)) {
@@ -10240,26 +10657,25 @@ SDValue DAGCombiner::visitMGATHER(SDNode *N) {
SDValue Scale = MGT->getScale();
SDValue PassThru = MGT->getPassThru();
SDValue BasePtr = MGT->getBasePtr();
+ ISD::MemIndexType IndexType = MGT->getIndexType();
SDLoc DL(N);
// Zap gathers with a zero mask.
if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
return CombineTo(N, PassThru, MGT->getChain());
- if (refineUniformBase(BasePtr, Index, DAG)) {
+ if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG)) {
SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
- return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other),
- MGT->getMemoryVT(), DL, Ops,
- MGT->getMemOperand(), MGT->getIndexType(),
- MGT->getExtensionType());
+ return DAG.getMaskedGather(
+ DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
+ Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
}
- if (refineIndexType(MGT, Index, MGT->isIndexScaled(), DAG)) {
+ if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
- return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other),
- MGT->getMemoryVT(), DL, Ops,
- MGT->getMemOperand(), MGT->getIndexType(),
- MGT->getExtensionType());
+ return DAG.getMaskedGather(
+ DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
+ Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
}
return SDValue();
@@ -10513,23 +10929,25 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
Other = N1;
}
+ // zext(x) >= y ? trunc(zext(x) - y) : 0
+ // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
+ // zext(x) > y ? trunc(zext(x) - y) : 0
+ // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
+ if (Other && Other.getOpcode() == ISD::TRUNCATE &&
+ Other.getOperand(0).getOpcode() == ISD::SUB &&
+ (SatCC == ISD::SETUGE || SatCC == ISD::SETUGT)) {
+ SDValue OpLHS = Other.getOperand(0).getOperand(0);
+ SDValue OpRHS = Other.getOperand(0).getOperand(1);
+ if (LHS == OpLHS && RHS == OpRHS && LHS.getOpcode() == ISD::ZERO_EXTEND)
+ if (SDValue R = getTruncatedUSUBSAT(VT, LHS.getValueType(), LHS, RHS,
+ DAG, DL))
+ return R;
+ }
+
if (Other && Other.getNumOperands() == 2) {
SDValue CondRHS = RHS;
SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
- if (Other.getOpcode() == ISD::SUB &&
- LHS.getOpcode() == ISD::ZERO_EXTEND && LHS.getOperand(0) == OpLHS &&
- OpRHS.getOpcode() == ISD::TRUNCATE && OpRHS.getOperand(0) == RHS) {
- // Look for a general sub with unsigned saturation first.
- // zext(x) >= y ? x - trunc(y) : 0
- // --> usubsat(x,trunc(umin(y,SatLimit)))
- // zext(x) > y ? x - trunc(y) : 0
- // --> usubsat(x,trunc(umin(y,SatLimit)))
- if (SatCC == ISD::SETUGE || SatCC == ISD::SETUGT)
- return getTruncatedUSUBSAT(VT, LHS.getValueType(), LHS, RHS, DAG,
- DL);
- }
-
if (OpLHS == LHS) {
// Look for a general sub with unsigned saturation first.
// x >= y ? x-y : 0 --> usubsat x, y
@@ -10560,8 +10978,8 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
// Another special case: If C was a sign bit, the sub has been
// canonicalized into a xor.
- // FIXME: Would it be better to use computeKnownBits to determine
- // whether it's safe to decanonicalize the xor?
+ // FIXME: Would it be better to use computeKnownBits to
+ // determine whether it's safe to decanonicalize the xor?
// x s< 0 ? x^C : 0 --> usubsat x, C
APInt SplatValue;
if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
@@ -10627,17 +11045,18 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
CC, SDLoc(N), false)) {
AddToWorklist(SCC.getNode());
- if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
- if (!SCCC->isZero())
- return N2; // cond always true -> true val
- else
- return N3; // cond always false -> false val
- } else if (SCC->isUndef()) {
- // When the condition is UNDEF, just return the first operand. This is
- // coherent the DAG creation, no setcc node is created in this case
+ // cond always true -> true val
+ // cond always false -> false val
+ if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode()))
+ return SCCC->isZero() ? N3 : N2;
+
+ // When the condition is UNDEF, just return the first operand. This is
+ // coherent the DAG creation, no setcc node is created in this case
+ if (SCC->isUndef())
return N2;
- } else if (SCC.getOpcode() == ISD::SETCC) {
- // Fold to a simpler select_cc
+
+ // Fold to a simpler select_cc
+ if (SCC.getOpcode() == ISD::SETCC) {
SDValue SelectOp = DAG.getNode(
ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0),
SCC.getOperand(1), N2, N3, SCC.getOperand(2));
@@ -10920,9 +11339,8 @@ static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
const TargetLowering &TLI) {
bool HasCopyToRegUses = false;
bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
- for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
- UE = N0.getNode()->use_end();
- UI != UE; ++UI) {
+ for (SDNode::use_iterator UI = N0->use_begin(), UE = N0->use_end(); UI != UE;
+ ++UI) {
SDNode *User = *UI;
if (User == N)
continue;
@@ -11254,9 +11672,12 @@ static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
bool LegalOperations, SDNode *N, SDValue N0,
ISD::LoadExtType ExtLoadType,
ISD::NodeType ExtOpc) {
+ // TODO: isFixedLengthVector() should be removed and any negative effects on
+ // code generation being the result of that target's implementation of
+ // isVectorLoadExtDesirable().
if (!ISD::isNON_EXTLoad(N0.getNode()) ||
!ISD::isUNINDEXEDLoad(N0.getNode()) ||
- ((LegalOperations || VT.isVector() ||
+ ((LegalOperations || VT.isFixedLengthVector() ||
!cast<LoadSDNode>(N0)->isSimple()) &&
!TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
return {};
@@ -11480,6 +11901,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
EVT VT = N->getValueType(0);
SDLoc DL(N);
+ // sext(undef) = 0 because the top bit will all be the same.
+ if (N0.isUndef())
+ return DAG.getConstant(0, DL, VT);
+
if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
return Res;
@@ -11649,10 +12074,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
// Return SDValue here as the xor should have already been replaced in
// this sext.
return SDValue();
- } else {
- // Return a new sext with the new xor.
- return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor);
}
+
+ // Return a new sext with the new xor.
+ return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor);
}
SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
@@ -11725,6 +12150,10 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ // zext(undef) = 0
+ if (N0.isUndef())
+ return DAG.getConstant(0, SDLoc(N), VT);
+
if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
return Res;
@@ -11984,6 +12413,10 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ // aext(undef) = undef
+ if (N0.isUndef())
+ return DAG.getUNDEF(VT);
+
if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
return Res;
@@ -12021,11 +12454,10 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
N0.getValueType())) {
SDLoc DL(N);
- SDValue X = N0.getOperand(0).getOperand(0);
- X = DAG.getAnyExtOrTrunc(X, DL, VT);
- APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
- return DAG.getNode(ISD::AND, DL, VT,
- X, DAG.getConstant(Mask, DL, VT));
+ SDValue X = DAG.getAnyExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
+ SDValue Y = DAG.getNode(ISD::ANY_EXTEND, DL, VT, N0.getOperand(1));
+ assert(isa<ConstantSDNode>(Y) && "Expected constant to be folded!");
+ return DAG.getNode(ISD::AND, DL, VT, X, Y);
}
// fold (aext (load x)) -> (aext (truncate (extload x)))
@@ -12153,13 +12585,9 @@ SDValue DAGCombiner::visitAssertExt(SDNode *N) {
// This eliminates the later assert:
// assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
// assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
+ SDLoc DL(N);
SDValue BigA = N0.getOperand(0);
EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
- assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
- "Asserting zero/sign-extended bits to a type larger than the "
- "truncated destination does not provide information");
-
- SDLoc DL(N);
EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
@@ -12175,10 +12603,6 @@ SDValue DAGCombiner::visitAssertExt(SDNode *N) {
Opcode == ISD::AssertZext) {
SDValue BigA = N0.getOperand(0);
EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
- assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
- "Asserting zero/sign-extended bits to a type larger than the "
- "truncated destination does not provide information");
-
if (AssertVT.bitsLT(BigA_AssertVT)) {
SDLoc DL(N);
SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
@@ -12296,13 +12720,11 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
unsigned ActiveBits = 0;
if (Mask.isMask()) {
ActiveBits = Mask.countTrailingOnes();
- } else if (Mask.isShiftedMask()) {
- ShAmt = Mask.countTrailingZeros();
- APInt ShiftedMask = Mask.lshr(ShAmt);
- ActiveBits = ShiftedMask.countTrailingOnes();
+ } else if (Mask.isShiftedMask(ShAmt, ActiveBits)) {
HasShiftedOffset = true;
- } else
+ } else {
return SDValue();
+ }
ExtType = ISD::ZEXTLOAD;
ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
@@ -12919,21 +13341,6 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
- // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
- // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
- // When the adde's carry is not used.
- if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
- N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
- // We only do for addcarry before legalize operation
- ((!LegalOperations && N0.getOpcode() == ISD::ADDCARRY) ||
- TLI.isOperationLegal(N0.getOpcode(), VT))) {
- SDLoc SL(N);
- auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
- auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
- auto VTs = DAG.getVTList(VT, N0->getValueType(1));
- return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
- }
-
// fold (truncate (extract_subvector(ext x))) ->
// (extract_subvector x)
// TODO: This can be generalized to cover cases where the truncate and extract
@@ -12978,6 +13385,22 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
}
}
break;
+ case ISD::ADDE:
+ case ISD::ADDCARRY:
+ // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
+ // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
+ // When the adde's carry is not used.
+ // We only do for addcarry before legalize operation
+ if (((!LegalOperations && N0.getOpcode() == ISD::ADDCARRY) ||
+ TLI.isOperationLegal(N0.getOpcode(), VT)) &&
+ N0.hasOneUse() && !N0->hasAnyUseOfValue(1)) {
+ SDLoc DL(N);
+ SDValue X = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
+ SDValue Y = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
+ SDVTList VTs = DAG.getVTList(VT, N0->getValueType(1));
+ return DAG.getNode(N0.getOpcode(), DL, VTs, X, Y, N0.getOperand(2));
+ }
+ break;
case ISD::USUBSAT:
// Truncate the USUBSAT only if LHS is a known zero-extension, its not
// enough to know that the upper bits are zero we must ensure that we don't
@@ -13111,7 +13534,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
(!LegalTypes ||
(!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
TLI.isTypeLegal(VT.getVectorElementType()))) &&
- N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
+ N0.getOpcode() == ISD::BUILD_VECTOR && N0->hasOneUse() &&
cast<BuildVectorSDNode>(N0)->isConstant())
return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
VT.getVectorElementType());
@@ -13179,8 +13602,8 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
// This often reduces constant pool loads.
if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
(N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
- N0.getNode()->hasOneUse() && VT.isInteger() &&
- !VT.isVector() && !N0.getValueType().isVector()) {
+ N0->hasOneUse() && VT.isInteger() && !VT.isVector() &&
+ !N0.getValueType().isVector()) {
SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
AddToWorklist(NewConv.getNode());
@@ -13228,9 +13651,9 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
// (xor (bitcast cst), (bitcast x)), 0),
// signbit)
// (xor (bitcast cst) (build_pair flipbit, flipbit))
- if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
- isa<ConstantFPSDNode>(N0.getOperand(0)) &&
- VT.isInteger() && !VT.isVector()) {
+ if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
+ isa<ConstantFPSDNode>(N0.getOperand(0)) && VT.isInteger() &&
+ !VT.isVector()) {
unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
if (isTypeLegal(IntXVT)) {
@@ -13312,8 +13735,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
if (Op.getOpcode() == ISD::BITCAST &&
Op.getOperand(0).getValueType() == VT)
return SDValue(Op.getOperand(0));
- if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
- ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
+ if (Op.isUndef() || isAnyConstantBuildVector(Op))
return DAG.getBitcast(VT, Op);
return SDValue();
};
@@ -13353,6 +13775,14 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) {
if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false))
return N0;
+ // Fold freeze(bitcast(x)) -> bitcast(freeze(x)).
+ // TODO: Replace with pushFreezeToPreventPoisonFromPropagating fold.
+ if (N0.getOpcode() == ISD::BITCAST)
+ return DAG.getBitcast(N->getValueType(0),
+ DAG.getNode(ISD::FREEZE, SDLoc(N0),
+ N0.getOperand(0).getValueType(),
+ N0.getOperand(0)));
+
return SDValue();
}
@@ -13444,7 +13874,7 @@ static bool isContractableFMUL(const TargetOptions &Options, SDValue N) {
// Returns true if `N` can assume no infinities involved in its computation.
static bool hasNoInfs(const TargetOptions &Options, SDValue N) {
- return Options.NoInfsFPMath || N.getNode()->getFlags().hasNoInfs();
+ return Options.NoInfsFPMath || N->getFlags().hasNoInfs();
}
/// Try to perform FMA combining on a given FADD node.
@@ -13498,7 +13928,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
// If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
// prefer to fold the multiply with fewer uses.
if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
- if (N0.getNode()->use_size() > N1.getNode()->use_size())
+ if (N0->use_size() > N1->use_size())
std::swap(N0, N1);
}
@@ -13728,7 +14158,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)),
// prefer to fold the multiply with fewer uses.
if (isContractableFMUL(N0) && isContractableFMUL(N1) &&
- (N0.getNode()->use_size() > N1.getNode()->use_size())) {
+ (N0->use_size() > N1->use_size())) {
// fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
if (SDValue V = tryToFoldXSubYZ(N0, N1))
return V;
@@ -14851,7 +15281,7 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
// fold (frem c1, c2) -> fmod(c1,c2)
if (SDValue C = DAG.FoldConstantArithmetic(ISD::FREM, SDLoc(N), VT, {N0, N1}))
return C;
-
+
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
@@ -15174,7 +15604,7 @@ static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
// This means this is also safe for a signed input and unsigned output, since
// a negative input would lead to undefined behavior.
unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
- unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
+ unsigned OutputSize = (int)VT.getScalarSizeInBits();
unsigned ActualSize = std::min(InputSize, OutputSize);
const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
@@ -15265,7 +15695,7 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
}
// fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
- if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
+ if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse()) {
SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
N0.getOperand(0), N1);
AddToWorklist(Tmp.getNode());
@@ -15709,7 +16139,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
// If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
// out. There is no reason to make this a preinc/predec.
if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
- Ptr.getNode()->hasOneUse())
+ Ptr->hasOneUse())
return false;
// Ask the target to do addressing mode selection.
@@ -15769,8 +16199,8 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
// a copy of the original base pointer.
SmallVector<SDNode *, 16> OtherUses;
if (isa<ConstantSDNode>(Offset))
- for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
- UE = BasePtr.getNode()->use_end();
+ for (SDNode::use_iterator UI = BasePtr->use_begin(),
+ UE = BasePtr->use_end();
UI != UE; ++UI) {
SDUse &Use = UI.getUse();
// Skip the use that is Ptr and uses of other results from BasePtr's
@@ -15808,7 +16238,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
// Now check for #3 and #4.
bool RealUse = false;
- for (SDNode *Use : Ptr.getNode()->uses()) {
+ for (SDNode *Use : Ptr->uses()) {
if (Use == N)
continue;
if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
@@ -15841,7 +16271,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
++PreIndexedNodes;
++NodesCombined;
LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
- Result.getNode()->dump(&DAG); dbgs() << '\n');
+ Result.dump(&DAG); dbgs() << '\n');
WorklistRemover DeadNodes(*this);
if (IsLoad) {
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
@@ -15931,7 +16361,7 @@ static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse,
return false;
SmallPtrSet<const SDNode *, 32> Visited;
- for (SDNode *Use : BasePtr.getNode()->uses()) {
+ for (SDNode *Use : BasePtr->uses()) {
if (Use == Ptr.getNode())
continue;
@@ -15968,7 +16398,7 @@ static SDNode *getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad,
const TargetLowering &TLI) {
if (!getCombineLoadStoreParts(N, ISD::POST_INC, ISD::POST_DEC, IsLoad,
IsMasked, Ptr, TLI) ||
- Ptr.getNode()->hasOneUse())
+ Ptr->hasOneUse())
return nullptr;
// Try turning it into a post-indexed load / store except when
@@ -16028,9 +16458,8 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
BasePtr, Offset, AM);
++PostIndexedNodes;
++NodesCombined;
- LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG);
- dbgs() << "\nWith: "; Result.getNode()->dump(&DAG);
- dbgs() << '\n');
+ LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG); dbgs() << "\nWith: ";
+ Result.dump(&DAG); dbgs() << '\n');
WorklistRemover DeadNodes(*this);
if (IsLoad) {
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
@@ -16271,7 +16700,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
// Now we replace use of chain2 with chain1. This makes the second load
// isomorphic to the one we are deleting, and thus makes this load live.
LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
- dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG);
+ dbgs() << "\nWith chain: "; Chain.dump(&DAG);
dbgs() << "\n");
WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
@@ -16302,7 +16731,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
} else
Index = DAG.getUNDEF(N->getValueType(1));
LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
- dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG);
+ dbgs() << "\nWith: "; Undef.dump(&DAG);
dbgs() << " and 2 other values\n");
WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
@@ -17014,11 +17443,19 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
// Check that it is legal on the target to do this. It is legal if the new
// VT we're shrinking to (i8/i16/i32) is legal or we're still before type
- // legalization (and the target doesn't explicitly think this is a bad idea).
+ // legalization. If the source type is legal, but the store type isn't, see
+ // if we can use a truncating store.
MVT VT = MVT::getIntegerVT(NumBytes * 8);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (!DC->isTypeLegal(VT))
+ bool UseTruncStore;
+ if (DC->isTypeLegal(VT))
+ UseTruncStore = false;
+ else if (TLI.isTypeLegal(IVal.getValueType()) &&
+ TLI.isTruncStoreLegal(IVal.getValueType(), VT))
+ UseTruncStore = true;
+ else
return SDValue();
+ // Check that the target doesn't think this is a bad idea.
if (St->getMemOperand() &&
!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
*St->getMemOperand()))
@@ -17046,10 +17483,15 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(StOffset), DL);
}
+ ++OpsNarrowed;
+ if (UseTruncStore)
+ return DAG.getTruncStore(St->getChain(), SDLoc(St), IVal, Ptr,
+ St->getPointerInfo().getWithOffset(StOffset),
+ VT, St->getOriginalAlign());
+
// Truncate down to the new size.
IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
- ++OpsNarrowed;
return DAG
.getStore(St->getChain(), SDLoc(St), IVal, Ptr,
St->getPointerInfo().getWithOffset(StOffset),
@@ -17070,11 +17512,15 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
SDValue Ptr = ST->getBasePtr();
EVT VT = Value.getValueType();
- if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
+ if (ST->isTruncatingStore() || VT.isVector())
return SDValue();
unsigned Opc = Value.getOpcode();
+ if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
+ !Value.hasOneUse())
+ return SDValue();
+
// If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
// is a byte mask indicating a consecutive number of bytes, check to see if
// Y is known to provide just those bytes. If so, we try to replace the
@@ -17099,8 +17545,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
if (!EnableReduceLoadOpStoreWidth)
return SDValue();
- if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
- Value.getOperand(1).getOpcode() != ISD::Constant)
+ if (Value.getOperand(1).getOpcode() != ISD::Constant)
return SDValue();
SDValue N0 = Value.getOperand(0);
@@ -17256,14 +17701,13 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
// (A + c1) * c3
// (A + c2) * c3
// We're checking for cases where we have common "c3 * A" expressions.
-bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
- SDValue &AddNode,
- SDValue &ConstNode) {
+bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
+ SDValue ConstNode) {
APInt Val;
// If the add only has one use, and the target thinks the folding is
// profitable or does not lead to worse code, this would be OK to do.
- if (AddNode.getNode()->hasOneUse() &&
+ if (AddNode->hasOneUse() &&
TLI.isMulAddWithConstProfitable(AddNode, ConstNode))
return true;
@@ -17397,7 +17841,9 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
if (isa<ConstantFPSDNode>(Val)) {
// Not clear how to truncate FP values.
return false;
- } else if (auto *C = dyn_cast<ConstantSDNode>(Val))
+ }
+
+ if (auto *C = dyn_cast<ConstantSDNode>(Val))
Val = DAG.getConstant(C->getAPIntValue()
.zextOrTrunc(Val.getValueSizeInBits())
.zextOrTrunc(ElementSizeBits),
@@ -17491,7 +17937,7 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
if (!UseTrunc) {
NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
FirstInChain->getPointerInfo(),
- FirstInChain->getAlign(), Flags.getValue(), AAInfo);
+ FirstInChain->getAlign(), *Flags, AAInfo);
} else { // Must be realized as a trunc store
EVT LegalizedStoredValTy =
TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
@@ -17503,7 +17949,7 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
NewStore = DAG.getTruncStore(
NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
- FirstInChain->getAlign(), Flags.getValue(), AAInfo);
+ FirstInChain->getAlign(), *Flags, AAInfo);
}
// Replace all merged stores with the new store.
@@ -17671,11 +18117,9 @@ void DAGCombiner::getStoreMergeCandidates(
}
}
-// We need to check that merging these stores does not cause a loop in
-// the DAG. Any store candidate may depend on another candidate
-// indirectly through its operand (we already consider dependencies
-// through the chain). Check in parallel by searching up from
-// non-chain operands of candidates.
+// We need to check that merging these stores does not cause a loop in the
+// DAG. Any store candidate may depend on another candidate indirectly through
+// its operands. Check in parallel by searching up from operands of candidates.
bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
SDNode *RootNode) {
@@ -17709,8 +18153,13 @@ bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
SDNode *N = StoreNodes[i].MemNode;
// Of the 4 Store Operands:
// * Chain (Op 0) -> We have already considered these
- // in candidate selection and can be
- // safely ignored
+ // in candidate selection, but only by following the
+ // chain dependencies. We could still have a chain
+ // dependency to a load, that has a non-chain dep to
+ // another load, that depends on a store, etc. So it is
+ // possible to have dependencies that consist of a mix
+ // of chain and non-chain deps, and we need to include
+ // chain operands in the analysis here..
// * Value (Op 1) -> Cycles may happen (e.g. through load chains)
// * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
// but aren't necessarily fromt the same base node, so
@@ -17718,7 +18167,7 @@ bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
// * (Op 3) -> Represents the pre or post-indexing offset (or undef for
// non-indexed stores). Not constant on all targets (e.g. ARM)
// and so can participate in a cycle.
- for (unsigned j = 1; j < N->getNumOperands(); ++j)
+ for (unsigned j = 0; j < N->getNumOperands(); ++j)
Worklist.push_back(N->getOperand(j).getNode());
}
// Search through DAG. We can stop early if we find a store node.
@@ -17793,7 +18242,7 @@ bool DAGCombiner::tryStoreMergeOfConstants(
while (NumConsecutiveStores >= 2) {
LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
unsigned FirstStoreAS = FirstInChain->getAddressSpace();
- unsigned FirstStoreAlign = FirstInChain->getAlignment();
+ Align FirstStoreAlign = FirstInChain->getAlign();
unsigned LastLegalType = 1;
unsigned LastLegalVectorType = 1;
bool LastIntegerTrunc = false;
@@ -17881,7 +18330,7 @@ bool DAGCombiner::tryStoreMergeOfConstants(
unsigned NumSkip = 1;
while ((NumSkip < NumConsecutiveStores) &&
(NumSkip < FirstZeroAfterNonZero) &&
- (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
+ (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
NumSkip++;
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
@@ -17920,7 +18369,7 @@ bool DAGCombiner::tryStoreMergeOfExtracts(
while (NumConsecutiveStores >= 2) {
LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
unsigned FirstStoreAS = FirstInChain->getAddressSpace();
- unsigned FirstStoreAlign = FirstInChain->getAlignment();
+ Align FirstStoreAlign = FirstInChain->getAlign();
unsigned NumStoresToMerge = 1;
for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
// Find a legal type for the vector store.
@@ -17951,7 +18400,7 @@ bool DAGCombiner::tryStoreMergeOfExtracts(
// improved. Drop as many candidates as we can here.
unsigned NumSkip = 1;
while ((NumSkip < NumConsecutiveStores) &&
- (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
+ (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
NumSkip++;
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
@@ -18248,7 +18697,7 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
for (unsigned i = 0; i < NumElem; ++i) {
SDValue Val = StoreNodes[i].MemNode->getOperand(1);
CombineTo(StoreNodes[i].MemNode, NewStore);
- if (Val.getNode()->use_empty())
+ if (Val->use_empty())
recursivelyDeleteUnusedNodes(Val.getNode());
}
@@ -18398,6 +18847,7 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
default:
llvm_unreachable("Unknown FP type");
case MVT::f16: // We don't do this for these yet.
+ case MVT::bf16:
case MVT::f80:
case MVT::f128:
case MVT::ppcf128:
@@ -18405,7 +18855,6 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
case MVT::f32:
if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
- ;
Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
bitcastToAPInt().getZExtValue(), SDLoc(CFP),
MVT::i32);
@@ -18417,7 +18866,6 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
ST->isSimple()) ||
TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
- ;
Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
getZExtValue(), SDLoc(CFP), MVT::i64);
return DAG.getStore(Chain, DL, Tmp,
@@ -18611,7 +19059,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// truncating store. We can do this even if this is already a truncstore.
if ((Value.getOpcode() == ISD::FP_ROUND ||
Value.getOpcode() == ISD::TRUNCATE) &&
- Value.getNode()->hasOneUse() && ST->isUnindexed() &&
+ Value->hasOneUse() && ST->isUnindexed() &&
TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
ST->getMemoryVT(), LegalOperations)) {
return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
@@ -18874,6 +19322,14 @@ SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
}
}
+ // If we failed to find a match, see if we can replace an UNDEF shuffle
+ // operand.
+ if (ElementOffset == -1 && Y.isUndef() &&
+ InsertVal0.getValueType() == Y.getValueType()) {
+ ElementOffset = Mask.size();
+ Y = InsertVal0;
+ }
+
if (ElementOffset != -1) {
SmallVector<int, 16> NewMask(Mask.begin(), Mask.end());
@@ -18972,10 +19428,9 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) {
if (VT.isScalableVector())
return DAG.getSplatVector(VT, DL, InVal);
- else {
- SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), InVal);
- return DAG.getBuildVector(VT, DL, Ops);
- }
+
+ SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), InVal);
+ return DAG.getBuildVector(VT, DL, Ops);
}
return SDValue();
}
@@ -18987,9 +19442,19 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
// We must know which element is being inserted for folds below here.
unsigned Elt = IndexC->getZExtValue();
+
if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
return Shuf;
+ // Handle <1 x ???> vector insertion special cases.
+ if (VT.getVectorNumElements() == 1) {
+ // insert_vector_elt(x, extract_vector_elt(y, 0), 0) -> y
+ if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ InVal.getOperand(0).getValueType() == VT &&
+ isNullConstant(InVal.getOperand(1)))
+ return InVal.getOperand(0);
+ }
+
// Canonicalize insert_vector_elt dag nodes.
// Example:
// (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
@@ -19010,36 +19475,84 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
}
}
- // If we can't generate a legal BUILD_VECTOR, exit
- if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
- return SDValue();
+ // Attempt to fold the insertion into a legal BUILD_VECTOR.
+ if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) {
+ auto UpdateBuildVector = [&](SmallVectorImpl<SDValue> &Ops) {
+ assert(Ops.size() == NumElts && "Unexpected vector size");
- // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
- // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
- // vector elements.
- SmallVector<SDValue, 8> Ops;
- // Do not combine these two vectors if the output vector will not replace
- // the input vector.
- if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
- Ops.append(InVec.getNode()->op_begin(),
- InVec.getNode()->op_end());
- } else if (InVec.isUndef()) {
- Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType()));
- } else {
- return SDValue();
- }
- assert(Ops.size() == NumElts && "Unexpected vector size");
+ // Insert the element
+ if (Elt < Ops.size()) {
+ // All the operands of BUILD_VECTOR must have the same type;
+ // we enforce that here.
+ EVT OpVT = Ops[0].getValueType();
+ Ops[Elt] =
+ OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
+ }
+
+ // Return the new vector
+ return DAG.getBuildVector(VT, DL, Ops);
+ };
+
+ // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
+ // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
+ // vector elements.
+ SmallVector<SDValue, 8> Ops;
+
+ // Do not combine these two vectors if the output vector will not replace
+ // the input vector.
+ if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
+ Ops.append(InVec->op_begin(), InVec->op_end());
+ return UpdateBuildVector(Ops);
+ }
+
+ if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR && InVec.hasOneUse()) {
+ Ops.push_back(InVec.getOperand(0));
+ Ops.append(NumElts - 1, DAG.getUNDEF(InVec.getOperand(0).getValueType()));
+ return UpdateBuildVector(Ops);
+ }
+
+ if (InVec.isUndef()) {
+ Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType()));
+ return UpdateBuildVector(Ops);
+ }
+
+ // If we're inserting into the end of a vector as part of an sequence, see
+ // if we can create a BUILD_VECTOR by following the sequence back up the
+ // chain.
+ if (Elt == (NumElts - 1)) {
+ SmallVector<SDValue> ReverseInsertions;
+ ReverseInsertions.push_back(InVal);
+
+ EVT MaxEltVT = InVal.getValueType();
+ SDValue CurVec = InVec;
+ for (unsigned I = 1; I != NumElts; ++I) {
+ if (CurVec.getOpcode() != ISD::INSERT_VECTOR_ELT || !CurVec.hasOneUse())
+ break;
- // Insert the element
- if (Elt < Ops.size()) {
- // All the operands of BUILD_VECTOR must have the same type;
- // we enforce that here.
- EVT OpVT = Ops[0].getValueType();
- Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
+ auto *CurIdx = dyn_cast<ConstantSDNode>(CurVec.getOperand(2));
+ if (!CurIdx || CurIdx->getAPIntValue() != ((NumElts - 1) - I))
+ break;
+ SDValue CurVal = CurVec.getOperand(1);
+ ReverseInsertions.push_back(CurVal);
+ if (VT.isInteger()) {
+ EVT CurValVT = CurVal.getValueType();
+ MaxEltVT = MaxEltVT.bitsGE(CurValVT) ? MaxEltVT : CurValVT;
+ }
+ CurVec = CurVec.getOperand(0);
+ }
+
+ if (ReverseInsertions.size() == NumElts) {
+ for (unsigned I = 0; I != NumElts; ++I) {
+ SDValue Val = ReverseInsertions[(NumElts - 1) - I];
+ Val = VT.isInteger() ? DAG.getAnyExtOrTrunc(Val, DL, MaxEltVT) : Val;
+ Ops.push_back(Val);
+ }
+ return DAG.getBuildVector(VT, DL, Ops);
+ }
+ }
}
- // Return the new vector
- return DAG.getBuildVector(VT, DL, Ops);
+ return SDValue();
}
SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
@@ -19088,47 +19601,33 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
SDValue NewPtr = TLI.getVectorElementPointer(DAG, OriginalLoad->getBasePtr(),
InVecVT, EltNo);
- // The replacement we need to do here is a little tricky: we need to
- // replace an extractelement of a load with a load.
- // Use ReplaceAllUsesOfValuesWith to do the replacement.
- // Note that this replacement assumes that the extractvalue is the only
- // use of the load; that's okay because we don't want to perform this
- // transformation in other cases anyway.
+ // We are replacing a vector load with a scalar load. The new load must have
+ // identical memory op ordering to the original.
SDValue Load;
- SDValue Chain;
if (ResultVT.bitsGT(VecEltVT)) {
// If the result type of vextract is wider than the load, then issue an
// extending load instead.
- ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
- VecEltVT)
- ? ISD::ZEXTLOAD
- : ISD::EXTLOAD;
- Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
- OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
- Alignment, OriginalLoad->getMemOperand()->getFlags(),
+ ISD::LoadExtType ExtType =
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT, VecEltVT) ? ISD::ZEXTLOAD
+ : ISD::EXTLOAD;
+ Load = DAG.getExtLoad(ExtType, DL, ResultVT, OriginalLoad->getChain(),
+ NewPtr, MPI, VecEltVT, Alignment,
+ OriginalLoad->getMemOperand()->getFlags(),
OriginalLoad->getAAInfo());
- Chain = Load.getValue(1);
+ DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
} else {
- Load = DAG.getLoad(
- VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI, Alignment,
- OriginalLoad->getMemOperand()->getFlags(), OriginalLoad->getAAInfo());
- Chain = Load.getValue(1);
+ // The result type is narrower or the same width as the vector element
+ Load = DAG.getLoad(VecEltVT, DL, OriginalLoad->getChain(), NewPtr, MPI,
+ Alignment, OriginalLoad->getMemOperand()->getFlags(),
+ OriginalLoad->getAAInfo());
+ DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
if (ResultVT.bitsLT(VecEltVT))
- Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
+ Load = DAG.getNode(ISD::TRUNCATE, DL, ResultVT, Load);
else
Load = DAG.getBitcast(ResultVT, Load);
}
- WorklistRemover DeadNodes(*this);
- SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
- SDValue To[] = { Load, Chain };
- DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
- // Make sure to revisit this node to clean it up; it will usually be dead.
- AddToWorklist(EVE);
- // Since we're explicitly calling ReplaceAllUses, add the new node to the
- // worklist explicitly as well.
- AddToWorklistWithUsers(Load.getNode());
++OpsNarrowed;
- return SDValue(EVE, 0);
+ return Load;
}
/// Transform a vector binary operation into a scalar binary operation by moving
@@ -19140,7 +19639,7 @@ static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
SDValue Index = ExtElt->getOperand(1);
auto *IndexC = dyn_cast<ConstantSDNode>(Index);
if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() ||
- Vec.getNode()->getNumValues() != 1)
+ Vec->getNumValues() != 1)
return SDValue();
// Targets may want to avoid this to prevent an expensive register transfer.
@@ -19196,8 +19695,9 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
// EXTRACT_VECTOR_ELT may widen the extracted vector.
SDValue InOp = VecOp.getOperand(0);
if (InOp.getValueType() != ScalarVT) {
- assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
- return DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
+ assert(InOp.getValueType().isInteger() && ScalarVT.isInteger() &&
+ InOp.getValueType().bitsGT(ScalarVT));
+ return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, InOp);
}
return InOp;
}
@@ -19655,7 +20155,7 @@ SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
if (!isa<ConstantSDNode>(ShiftAmtVal))
return SDValue();
- uint64_t ShiftAmt = In.getNode()->getConstantOperandVal(1);
+ uint64_t ShiftAmt = In.getConstantOperandVal(1);
// The extracted value is not extracted at the right position
if (ShiftAmt != i * ScalarTypeBitsize)
@@ -20096,18 +20596,39 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
int Left = 2 * In;
int Right = 2 * In + 1;
SmallVector<int, 8> Mask(NumElems, -1);
- for (unsigned i = 0; i != NumElems; ++i) {
- if (VectorMask[i] == Left) {
- Mask[i] = i;
- VectorMask[i] = In;
- } else if (VectorMask[i] == Right) {
- Mask[i] = i + NumElems;
- VectorMask[i] = In;
+ SDValue L = Shuffles[Left];
+ ArrayRef<int> LMask;
+ bool IsLeftShuffle = L.getOpcode() == ISD::VECTOR_SHUFFLE &&
+ L.use_empty() && L.getOperand(1).isUndef() &&
+ L.getOperand(0).getValueType() == L.getValueType();
+ if (IsLeftShuffle) {
+ LMask = cast<ShuffleVectorSDNode>(L.getNode())->getMask();
+ L = L.getOperand(0);
+ }
+ SDValue R = Shuffles[Right];
+ ArrayRef<int> RMask;
+ bool IsRightShuffle = R.getOpcode() == ISD::VECTOR_SHUFFLE &&
+ R.use_empty() && R.getOperand(1).isUndef() &&
+ R.getOperand(0).getValueType() == R.getValueType();
+ if (IsRightShuffle) {
+ RMask = cast<ShuffleVectorSDNode>(R.getNode())->getMask();
+ R = R.getOperand(0);
+ }
+ for (unsigned I = 0; I != NumElems; ++I) {
+ if (VectorMask[I] == Left) {
+ Mask[I] = I;
+ if (IsLeftShuffle)
+ Mask[I] = LMask[I];
+ VectorMask[I] = In;
+ } else if (VectorMask[I] == Right) {
+ Mask[I] = I + NumElems;
+ if (IsRightShuffle)
+ Mask[I] = RMask[I] + NumElems;
+ VectorMask[I] = In;
}
}
- Shuffles[In] =
- DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
+ Shuffles[In] = DAG.getVectorShuffle(VT, DL, L, R, Mask);
}
}
return Shuffles[0];
@@ -20695,7 +21216,7 @@ static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue BinOp = Extract->getOperand(0);
unsigned BinOpcode = BinOp.getOpcode();
- if (!TLI.isBinOp(BinOpcode) || BinOp.getNode()->getNumValues() != 1)
+ if (!TLI.isBinOp(BinOpcode) || BinOp->getNumValues() != 1)
return SDValue();
EVT VecVT = BinOp.getValueType();
@@ -20744,7 +21265,7 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG,
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
unsigned BOpcode = BinOp.getOpcode();
- if (!TLI.isBinOp(BOpcode) || BinOp.getNode()->getNumValues() != 1)
+ if (!TLI.isBinOp(BOpcode) || BinOp->getNumValues() != 1)
return SDValue();
// Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be
@@ -20803,8 +21324,8 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG,
BinOp.getOperand(0), NewExtIndex);
SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
BinOp.getOperand(1), NewExtIndex);
- SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y,
- BinOp.getNode()->getFlags());
+ SDValue NarrowBinOp =
+ DAG.getNode(BOpcode, DL, NarrowBVT, X, Y, BinOp->getFlags());
return DAG.getBitcast(VT, NarrowBinOp);
}
@@ -21085,6 +21606,12 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
}
}
+ // ty1 extract_vector(ty2 splat(V))) -> ty1 splat(V)
+ if (V.getOpcode() == ISD::SPLAT_VECTOR)
+ if (DAG.isConstantValueOfAnyType(V.getOperand(0)) || V.hasOneUse())
+ if (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, NVT))
+ return DAG.getSplatVector(NVT, SDLoc(N), V.getOperand(0));
+
// Try to move vector bitcast after extract_subv by scaling extraction index:
// extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
if (V.getOpcode() == ISD::BITCAST &&
@@ -21450,9 +21977,10 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
if (SVT != VT.getScalarType())
for (SDValue &Op : Ops)
- Op = TLI.isZExtFree(Op.getValueType(), SVT)
- ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
- : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
+ Op = Op.isUndef() ? DAG.getUNDEF(SVT)
+ : (TLI.isZExtFree(Op.getValueType(), SVT)
+ ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
+ : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT));
return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
}
@@ -21582,6 +22110,13 @@ static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf,
SelectionDAG &DAG) {
if (!Shuf->getOperand(1).isUndef())
return SDValue();
+
+ // If the inner operand is a known splat with no undefs, just return that directly.
+ // TODO: Create DemandedElts mask from Shuf's mask.
+ // TODO: Allow undef elements and merge with the shuffle code below.
+ if (DAG.isSplatValue(Shuf->getOperand(0), /*AllowUndefs*/ false))
+ return Shuf->getOperand(0);
+
auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
if (!Splat || !Splat->isSplat())
return SDValue();
@@ -21628,6 +22163,53 @@ static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf,
NewMask);
}
+// Combine shuffles of bitcasts into a shuffle of the bitcast type, providing
+// the mask can be treated as a larger type.
+static SDValue combineShuffleOfBitcast(ShuffleVectorSDNode *SVN,
+ SelectionDAG &DAG,
+ const TargetLowering &TLI,
+ bool LegalOperations) {
+ SDValue Op0 = SVN->getOperand(0);
+ SDValue Op1 = SVN->getOperand(1);
+ EVT VT = SVN->getValueType(0);
+ if (Op0.getOpcode() != ISD::BITCAST)
+ return SDValue();
+ EVT InVT = Op0.getOperand(0).getValueType();
+ if (!InVT.isVector() ||
+ (!Op1.isUndef() && (Op1.getOpcode() != ISD::BITCAST ||
+ Op1.getOperand(0).getValueType() != InVT)))
+ return SDValue();
+ if (isAnyConstantBuildVector(Op0.getOperand(0)) &&
+ (Op1.isUndef() || isAnyConstantBuildVector(Op1.getOperand(0))))
+ return SDValue();
+
+ int VTLanes = VT.getVectorNumElements();
+ int InLanes = InVT.getVectorNumElements();
+ if (VTLanes <= InLanes || VTLanes % InLanes != 0 ||
+ (LegalOperations &&
+ !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, InVT)))
+ return SDValue();
+ int Factor = VTLanes / InLanes;
+
+ // Check that each group of lanes in the mask are either undef or make a valid
+ // mask for the wider lane type.
+ ArrayRef<int> Mask = SVN->getMask();
+ SmallVector<int> NewMask;
+ if (!widenShuffleMaskElts(Factor, Mask, NewMask))
+ return SDValue();
+
+ if (!TLI.isShuffleMaskLegal(NewMask, InVT))
+ return SDValue();
+
+ // Create the new shuffle with the new mask and bitcast it back to the
+ // original type.
+ SDLoc DL(SVN);
+ Op0 = Op0.getOperand(0);
+ Op1 = Op1.isUndef() ? DAG.getUNDEF(InVT) : Op1.getOperand(0);
+ SDValue NewShuf = DAG.getVectorShuffle(InVT, DL, Op0, Op1, NewMask);
+ return DAG.getBitcast(VT, NewShuf);
+}
+
/// Combine shuffle of shuffle of the form:
/// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X
static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf,
@@ -21839,7 +22421,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
int SplatIndex = SVN->getSplatIndex();
if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
- TLI.isBinOp(N0.getOpcode()) && N0.getNode()->getNumValues() == 1) {
+ TLI.isBinOp(N0.getOpcode()) && N0->getNumValues() == 1) {
// splat (vector_bo L, R), Index -->
// splat (scalar_bo (extelt L, Index), (extelt R, Index))
SDValue L = N0.getOperand(0), R = N0.getOperand(1);
@@ -21848,13 +22430,26 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL);
SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
- SDValue NewBO = DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR,
- N0.getNode()->getFlags());
+ SDValue NewBO =
+ DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR, N0->getFlags());
SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
SmallVector<int, 16> ZeroMask(VT.getVectorNumElements(), 0);
return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
}
+ // splat(scalar_to_vector(x), 0) -> build_vector(x,...,x)
+ // splat(insert_vector_elt(v, x, c), c) -> build_vector(x,...,x)
+ if ((!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) &&
+ N0.hasOneUse()) {
+ if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && SplatIndex == 0)
+ return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(0));
+
+ if (N0.getOpcode() == ISD::INSERT_VECTOR_ELT)
+ if (auto *Idx = dyn_cast<ConstantSDNode>(N0.getOperand(2)))
+ if (Idx->getAPIntValue() == SplatIndex)
+ return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(1));
+ }
+
// If this is a bit convert that changes the element type of the vector but
// not the number of vector elements, look through it. Be careful not to
// look though conversions that change things like v4f32 to v2f64.
@@ -22078,6 +22673,11 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
}
}
+ // Match shuffles of bitcasts, so long as the mask can be treated as the
+ // larger type.
+ if (SDValue V = combineShuffleOfBitcast(SVN, DAG, TLI, LegalOperations))
+ return V;
+
// Compute the combined shuffle mask for a shuffle with SV0 as the first
// operand, and SV1 as the second operand.
// i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask) iff Commute = false
@@ -22409,6 +23009,11 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
return N1.getOperand(0);
+ // Simplify scalar inserts into an undef vector:
+ // insert_subvector undef, (splat X), N2 -> splat X
+ if (N0.isUndef() && N1.getOpcode() == ISD::SPLAT_VECTOR)
+ return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, N1.getOperand(0));
+
// If we are inserting a bitcast value into an undef, with the same
// number of elements, just use the bitcast input of the extract.
// i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
@@ -22556,6 +23161,16 @@ SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitFP_TO_BF16(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+
+ // fold (fp_to_bf16 (bf16_to_fp op)) -> op
+ if (N0->getOpcode() == ISD::BF16_TO_FP)
+ return N0->getOperand(0);
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N0.getValueType();
@@ -22583,6 +23198,19 @@ SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
}
+ // vecreduce_or(insert_subvector(zero or undef, val)) -> vecreduce_or(val)
+ // vecreduce_and(insert_subvector(ones or undef, val)) -> vecreduce_and(val)
+ if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
+ TLI.isTypeLegal(N0.getOperand(1).getValueType())) {
+ SDValue Vec = N0.getOperand(0);
+ SDValue Subvec = N0.getOperand(1);
+ if ((Opcode == ISD::VECREDUCE_OR &&
+ (N0.getOperand(0).isUndef() || isNullOrNullSplat(Vec))) ||
+ (Opcode == ISD::VECREDUCE_AND &&
+ (N0.getOperand(0).isUndef() || isAllOnesOrAllOnesSplat(Vec))))
+ return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), Subvec);
+ }
+
return SDValue();
}
@@ -22886,7 +23514,7 @@ SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
// Check to see if we got a select_cc back (to turn into setcc/select).
// Otherwise, just return whatever node we got back, like fabs.
if (SCC.getOpcode() == ISD::SELECT_CC) {
- const SDNodeFlags Flags = N0.getNode()->getFlags();
+ const SDNodeFlags Flags = N0->getFlags();
SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
N0.getValueType(),
SCC.getOperand(0), SCC.getOperand(1),
@@ -23556,6 +24184,27 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) {
return SDValue();
}
+/// Given an ISD::SREM node expressing a remainder by constant power of 2,
+/// return a DAG expression that will generate the same value.
+SDValue DAGCombiner::BuildSREMPow2(SDNode *N) {
+ ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
+ if (!C)
+ return SDValue();
+
+ // Avoid division by zero.
+ if (C->isZero())
+ return SDValue();
+
+ SmallVector<SDNode *, 8> Built;
+ if (SDValue S = TLI.BuildSREMPow2(N, C->getAPIntValue(), DAG, Built)) {
+ for (SDNode *N : Built)
+ AddToWorklist(N);
+ return S;
+ }
+
+ return SDValue();
+}
+
/// Determines the LogBase2 value for a non-null input value using the
/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
@@ -23865,9 +24514,8 @@ bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
auto &Size0 = MUC0.NumBytes;
auto &Size1 = MUC1.NumBytes;
if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
- Size0.hasValue() && Size1.hasValue() && *Size0 == *Size1 &&
- OrigAlignment0 > *Size0 && SrcValOffset0 % *Size0 == 0 &&
- SrcValOffset1 % *Size1 == 0) {
+ Size0 && Size1 && *Size0 == *Size1 && OrigAlignment0 > *Size0 &&
+ SrcValOffset0 % *Size0 == 0 && SrcValOffset1 % *Size1 == 0) {
int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();
@@ -23886,8 +24534,8 @@ bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
UseAA = false;
#endif
- if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
- Size0.hasValue() && Size1.hasValue()) {
+ if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() && Size0 &&
+ Size1) {
// Use alias analysis information.
int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
int64_t Overlap0 = *Size0 + SrcValOffset0 - MinOffset;
@@ -23920,7 +24568,7 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
unsigned Depth = 0;
// Attempt to improve chain by a single step
- std::function<bool(SDValue &)> ImproveChain = [&](SDValue &C) -> bool {
+ auto ImproveChain = [&](SDValue &C) -> bool {
switch (C.getOpcode()) {
case ISD::EntryToken:
// No need to mark EntryToken.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index 87a1ebe4c1db..ff5779967e22 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -72,7 +72,6 @@
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
@@ -94,7 +93,6 @@
#include "llvm/IR/Value.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInstrDesc.h"
-#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -1408,16 +1406,6 @@ bool FastISel::selectCast(const User *I, unsigned Opcode) {
}
bool FastISel::selectBitCast(const User *I) {
- // If the bitcast doesn't change the type, just use the operand value.
- if (I->getType() == I->getOperand(0)->getType()) {
- Register Reg = getRegForValue(I->getOperand(0));
- if (!Reg)
- return false;
- updateValueMap(I, Reg);
- return true;
- }
-
- // Bitcasts of other values become reg-reg copies or BITCAST operators.
EVT SrcEVT = TLI.getValueType(DL, I->getOperand(0)->getType());
EVT DstEVT = TLI.getValueType(DL, I->getType());
if (SrcEVT == MVT::Other || DstEVT == MVT::Other ||
@@ -1431,23 +1419,14 @@ bool FastISel::selectBitCast(const User *I) {
if (!Op0) // Unhandled operand. Halt "fast" selection and bail.
return false;
- // First, try to perform the bitcast by inserting a reg-reg copy.
- Register ResultReg;
+ // If the bitcast doesn't change the type, just use the operand value.
if (SrcVT == DstVT) {
- const TargetRegisterClass *SrcClass = TLI.getRegClassFor(SrcVT);
- const TargetRegisterClass *DstClass = TLI.getRegClassFor(DstVT);
- // Don't attempt a cross-class copy. It will likely fail.
- if (SrcClass == DstClass) {
- ResultReg = createResultReg(DstClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY), ResultReg).addReg(Op0);
- }
+ updateValueMap(I, Op0);
+ return true;
}
- // If the reg-reg copy failed, select a BITCAST opcode.
- if (!ResultReg)
- ResultReg = fastEmit_r(SrcVT, DstVT, ISD::BITCAST, Op0);
-
+ // Otherwise, select a BITCAST opcode.
+ Register ResultReg = fastEmit_r(SrcVT, DstVT, ISD::BITCAST, Op0);
if (!ResultReg)
return false;
@@ -2251,6 +2230,11 @@ bool FastISel::tryToFoldLoad(const LoadInst *LI, const Instruction *FoldInst) {
if (!MRI.hasOneUse(LoadReg))
return false;
+ // If the register has fixups, there may be additional uses through a
+ // different alias of the register.
+ if (FuncInfo.RegsWithFixups.contains(LoadReg))
+ return false;
+
MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(LoadReg);
MachineInstr *User = RI->getParent();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index 85c6eca5775e..aa9c77f9cabf 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -31,13 +31,10 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetOptions.h"
#include <algorithm>
using namespace llvm;
@@ -57,7 +54,7 @@ static bool isUsedOutsideOfDefiningBlock(const Instruction *I) {
return false;
}
-static ISD::NodeType getPreferredExtendForValue(const Value *V) {
+static ISD::NodeType getPreferredExtendForValue(const Instruction *I) {
// For the users of the source value being used for compare instruction, if
// the number of signed predicate is greater than unsigned predicate, we
// prefer to use SIGN_EXTEND.
@@ -67,7 +64,7 @@ static ISD::NodeType getPreferredExtendForValue(const Value *V) {
// can be exposed.
ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
unsigned NumOfSigned = 0, NumOfUnsigned = 0;
- for (const User *U : V->users()) {
+ for (const User *U : I->users()) {
if (const auto *CI = dyn_cast<CmpInst>(U)) {
NumOfSigned += CI->isSigned();
NumOfUnsigned += CI->isUnsigned();
@@ -448,9 +445,14 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
IntVT = TLI->getTypeToTransformTo(PN->getContext(), IntVT);
unsigned BitWidth = IntVT.getSizeInBits();
- Register DestReg = ValueMap[PN];
- if (!Register::isVirtualRegister(DestReg))
+ auto It = ValueMap.find(PN);
+ if (It == ValueMap.end())
return;
+
+ Register DestReg = It->second;
+ if (DestReg == 0)
+ return
+ assert(Register::isVirtualRegister(DestReg) && "Expected a virtual reg");
LiveOutRegInfo.grow(DestReg);
LiveOutInfo &DestLOI = LiveOutRegInfo[DestReg];
@@ -462,7 +464,11 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
}
if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
- APInt Val = CI->getValue().zextOrTrunc(BitWidth);
+ APInt Val;
+ if (TLI->signExtendConstant(CI))
+ Val = CI->getValue().sext(BitWidth);
+ else
+ Val = CI->getValue().zext(BitWidth);
DestLOI.NumSignBits = Val.getNumSignBits();
DestLOI.Known = KnownBits::makeConstant(Val);
} else {
@@ -494,7 +500,11 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
}
if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
- APInt Val = CI->getValue().zextOrTrunc(BitWidth);
+ APInt Val;
+ if (TLI->signExtendConstant(CI))
+ Val = CI->getValue().sext(BitWidth);
+ else
+ Val = CI->getValue().zext(BitWidth);
DestLOI.NumSignBits = std::min(DestLOI.NumSignBits, Val.getNumSignBits());
DestLOI.Known.Zero &= ~Val;
DestLOI.Known.One &= Val;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index e3e05c868102..3d3b504c6abd 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -14,22 +14,18 @@
#include "InstrEmitter.h"
#include "SDNodeDbgValue.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/PseudoProbe.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -321,8 +317,15 @@ InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB,
OpRC = TII->getRegClass(*II, IIOpNum, TRI, *MF);
if (OpRC) {
+ unsigned MinNumRegs = MinRCSize;
+ // Don't apply any RC size limit for IMPLICIT_DEF. Each use has a unique
+ // virtual register.
+ if (Op.isMachineOpcode() &&
+ Op.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF)
+ MinNumRegs = 0;
+
const TargetRegisterClass *ConstrainedRC
- = MRI->constrainRegClass(VReg, OpRC, MinRCSize);
+ = MRI->constrainRegClass(VReg, OpRC, MinNumRegs);
if (!ConstrainedRC) {
OpRC = TRI->getAllocatableClass(OpRC);
assert(OpRC && "Constraints cannot be fulfilled for allocation");
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 54481b94fdd8..8bdc9410d131 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -13,6 +13,7 @@
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/FloatingPointMode.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
@@ -45,7 +46,6 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
-#include <algorithm>
#include <cassert>
#include <cstdint>
#include <tuple>
@@ -142,12 +142,10 @@ private:
RTLIB::Libcall Call_F128,
RTLIB::Libcall Call_PPCF128,
SmallVectorImpl<SDValue> &Results);
- SDValue ExpandIntLibCall(SDNode *Node, bool isSigned,
- RTLIB::Libcall Call_I8,
- RTLIB::Libcall Call_I16,
- RTLIB::Libcall Call_I32,
- RTLIB::Libcall Call_I64,
- RTLIB::Libcall Call_I128);
+ SDValue ExpandIntLibCall(SDNode *Node, bool isSigned, RTLIB::Libcall Call_I8,
+ RTLIB::Libcall Call_I16, RTLIB::Libcall Call_I32,
+ RTLIB::Libcall Call_I64, RTLIB::Libcall Call_I128,
+ RTLIB::Libcall Call_IEXT);
void ExpandArgFPLibCall(SDNode *Node,
RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64,
RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128,
@@ -1000,6 +998,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other);
break;
case ISD::FP_TO_FP16:
+ case ISD::FP_TO_BF16:
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
case ISD::EXTRACT_VECTOR_ELT:
@@ -1036,14 +1035,18 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
case ISD::STRICT_FSETCC:
case ISD::STRICT_FSETCCS:
case ISD::SETCC:
+ case ISD::VP_SETCC:
case ISD::BR_CC: {
- unsigned CCOperand = Node->getOpcode() == ISD::SELECT_CC ? 4 :
- Node->getOpcode() == ISD::STRICT_FSETCC ? 3 :
- Node->getOpcode() == ISD::STRICT_FSETCCS ? 3 :
- Node->getOpcode() == ISD::SETCC ? 2 : 1;
- unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 :
- Node->getOpcode() == ISD::STRICT_FSETCC ? 1 :
- Node->getOpcode() == ISD::STRICT_FSETCCS ? 1 : 0;
+ unsigned Opc = Node->getOpcode();
+ unsigned CCOperand = Opc == ISD::SELECT_CC ? 4
+ : Opc == ISD::STRICT_FSETCC ? 3
+ : Opc == ISD::STRICT_FSETCCS ? 3
+ : (Opc == ISD::SETCC || Opc == ISD::VP_SETCC) ? 2
+ : 1;
+ unsigned CompareOperand = Opc == ISD::BR_CC ? 2
+ : Opc == ISD::STRICT_FSETCC ? 1
+ : Opc == ISD::STRICT_FSETCCS ? 1
+ : 0;
MVT OpVT = Node->getOperand(CompareOperand).getSimpleValueType();
ISD::CondCode CCCode =
cast<CondCodeSDNode>(Node->getOperand(CCOperand))->get();
@@ -1174,6 +1177,11 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
Node->getOpcode(),
cast<VPStoreSDNode>(Node)->getValue().getValueType());
break;
+ case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
+ Action = TLI.getOperationAction(
+ Node->getOpcode(),
+ cast<VPStridedStoreSDNode>(Node)->getValue().getValueType());
+ break;
case ISD::VECREDUCE_FADD:
case ISD::VECREDUCE_FMUL:
case ISD::VECREDUCE_ADD:
@@ -1187,6 +1195,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
case ISD::VECREDUCE_UMIN:
case ISD::VECREDUCE_FMAX:
case ISD::VECREDUCE_FMIN:
+ case ISD::IS_FPCLASS:
Action = TLI.getOperationAction(
Node->getOpcode(), Node->getOperand(0).getValueType());
break;
@@ -1212,7 +1221,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
break;
default:
if (Node->getOpcode() >= ISD::BUILTIN_OP_END) {
- Action = TargetLowering::Legal;
+ Action = TLI.getCustomOperationAction(*Node);
} else {
Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
}
@@ -1723,16 +1732,14 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT,
SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT,
EVT DestVT, const SDLoc &dl,
SDValue Chain) {
- unsigned SrcSize = SrcOp.getValueSizeInBits();
- unsigned SlotSize = SlotVT.getSizeInBits();
- unsigned DestSize = DestVT.getSizeInBits();
+ EVT SrcVT = SrcOp.getValueType();
Type *DestType = DestVT.getTypeForEVT(*DAG.getContext());
Align DestAlign = DAG.getDataLayout().getPrefTypeAlign(DestType);
// Don't convert with stack if the load/store is expensive.
- if ((SrcSize > SlotSize &&
+ if ((SrcVT.bitsGT(SlotVT) &&
!TLI.isTruncStoreLegalOrCustom(SrcOp.getValueType(), SlotVT)) ||
- (SlotSize < DestSize &&
+ (SlotVT.bitsLT(DestVT) &&
!TLI.isLoadExtLegalOrCustom(ISD::EXTLOAD, DestVT, SlotVT)))
return SDValue();
@@ -1750,20 +1757,19 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT,
// later than DestVT.
SDValue Store;
- if (SrcSize > SlotSize)
+ if (SrcVT.bitsGT(SlotVT))
Store = DAG.getTruncStore(Chain, dl, SrcOp, FIPtr, PtrInfo,
SlotVT, SrcAlign);
else {
- assert(SrcSize == SlotSize && "Invalid store");
- Store =
- DAG.getStore(Chain, dl, SrcOp, FIPtr, PtrInfo, SrcAlign);
+ assert(SrcVT.bitsEq(SlotVT) && "Invalid store");
+ Store = DAG.getStore(Chain, dl, SrcOp, FIPtr, PtrInfo, SrcAlign);
}
// Result is a load from the stack slot.
- if (SlotSize == DestSize)
+ if (SlotVT.bitsEq(DestVT))
return DAG.getLoad(DestVT, dl, Store, FIPtr, PtrInfo, DestAlign);
- assert(SlotSize < DestSize && "Unknown extension!");
+ assert(SlotVT.bitsLT(DestVT) && "Unknown extension!");
return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr, PtrInfo, SlotVT,
DestAlign);
}
@@ -2101,15 +2107,17 @@ void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
ExpandFPLibCall(Node, LC, Results);
}
-SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
- RTLIB::Libcall Call_I8,
- RTLIB::Libcall Call_I16,
- RTLIB::Libcall Call_I32,
- RTLIB::Libcall Call_I64,
- RTLIB::Libcall Call_I128) {
+SDValue SelectionDAGLegalize::ExpandIntLibCall(
+ SDNode *Node, bool isSigned, RTLIB::Libcall Call_I8,
+ RTLIB::Libcall Call_I16, RTLIB::Libcall Call_I32, RTLIB::Libcall Call_I64,
+ RTLIB::Libcall Call_I128, RTLIB::Libcall Call_IEXT) {
RTLIB::Libcall LC;
switch (Node->getSimpleValueType(0).SimpleTy) {
- default: llvm_unreachable("Unexpected request for libcall!");
+
+ default:
+ LC = Call_IEXT;
+ break;
+
case MVT::i8: LC = Call_I8; break;
case MVT::i16: LC = Call_I16; break;
case MVT::i32: LC = Call_I32; break;
@@ -2144,7 +2152,11 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
RTLIB::Libcall LC;
switch (Node->getSimpleValueType(0).SimpleTy) {
- default: llvm_unreachable("Unexpected request for libcall!");
+
+ default:
+ LC = isSigned ? RTLIB::SDIVREM_IEXT : RTLIB::UDIVREM_IEXT;
+ break;
+
case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
@@ -2893,6 +2905,18 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Node->getValueType(0), dl)))
Results.push_back(Tmp1);
break;
+ case ISD::BF16_TO_FP: {
+ // Always expand bf16 to f32 casts, they lower to ext + shift.
+ SDValue Op = DAG.getNode(ISD::BITCAST, dl, MVT::i16, Node->getOperand(0));
+ Op = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op);
+ Op = DAG.getNode(
+ ISD::SHL, dl, MVT::i32, Op,
+ DAG.getConstant(16, dl,
+ TLI.getShiftAmountTy(MVT::i32, DAG.getDataLayout())));
+ Op = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op);
+ Results.push_back(Op);
+ break;
+ }
case ISD::SIGN_EXTEND_INREG: {
EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
EVT VT = Node->getValueType(0);
@@ -2904,7 +2928,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
// SIGN_EXTEND_INREG does not guarantee that the high bits are already zero.
// TODO: Do this for vectors too?
- if (ExtraVT.getSizeInBits() == 1) {
+ if (ExtraVT.isScalarInteger() && ExtraVT.getSizeInBits() == 1) {
SDValue One = DAG.getConstant(1, dl, VT);
SDValue And = DAG.getNode(ISD::AND, dl, VT, Node->getOperand(0), One);
SDValue Zero = DAG.getConstant(0, dl, VT);
@@ -3135,6 +3159,15 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
case ISD::FABS:
Results.push_back(ExpandFABS(Node));
break;
+ case ISD::IS_FPCLASS: {
+ auto CNode = cast<ConstantSDNode>(Node->getOperand(1));
+ auto Test = static_cast<FPClassTest>(CNode->getZExtValue());
+ if (SDValue Expanded =
+ TLI.expandIS_FPCLASS(Node->getValueType(0), Node->getOperand(0),
+ Test, Node->getFlags(), SDLoc(Node), DAG))
+ Results.push_back(Expanded);
+ break;
+ }
case ISD::SMIN:
case ISD::SMAX:
case ISD::UMIN:
@@ -3577,18 +3610,26 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(Tmp1);
break;
case ISD::SETCC:
+ case ISD::VP_SETCC:
case ISD::STRICT_FSETCC:
case ISD::STRICT_FSETCCS: {
- bool IsStrict = Node->getOpcode() != ISD::SETCC;
+ bool IsVP = Node->getOpcode() == ISD::VP_SETCC;
+ bool IsStrict = Node->getOpcode() == ISD::STRICT_FSETCC ||
+ Node->getOpcode() == ISD::STRICT_FSETCCS;
bool IsSignaling = Node->getOpcode() == ISD::STRICT_FSETCCS;
SDValue Chain = IsStrict ? Node->getOperand(0) : SDValue();
unsigned Offset = IsStrict ? 1 : 0;
Tmp1 = Node->getOperand(0 + Offset);
Tmp2 = Node->getOperand(1 + Offset);
Tmp3 = Node->getOperand(2 + Offset);
- bool Legalized =
- TLI.LegalizeSetCCCondCode(DAG, Node->getValueType(0), Tmp1, Tmp2, Tmp3,
- NeedInvert, dl, Chain, IsSignaling);
+ SDValue Mask, EVL;
+ if (IsVP) {
+ Mask = Node->getOperand(3 + Offset);
+ EVL = Node->getOperand(4 + Offset);
+ }
+ bool Legalized = TLI.LegalizeSetCCCondCode(
+ DAG, Node->getValueType(0), Tmp1, Tmp2, Tmp3, Mask, EVL, NeedInvert, dl,
+ Chain, IsSignaling);
if (Legalized) {
// If we expanded the SETCC by swapping LHS and RHS, or by inverting the
@@ -3598,6 +3639,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Tmp1 = DAG.getNode(Node->getOpcode(), dl, Node->getVTList(),
{Chain, Tmp1, Tmp2, Tmp3}, Node->getFlags());
Chain = Tmp1.getValue(1);
+ } else if (IsVP) {
+ Tmp1 = DAG.getNode(Node->getOpcode(), dl, Node->getValueType(0),
+ {Tmp1, Tmp2, Tmp3, Mask, EVL}, Node->getFlags());
} else {
Tmp1 = DAG.getNode(Node->getOpcode(), dl, Node->getValueType(0), Tmp1,
Tmp2, Tmp3, Node->getFlags());
@@ -3606,8 +3650,13 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
// If we expanded the SETCC by inverting the condition code, then wrap
// the existing SETCC in a NOT to restore the intended condition.
- if (NeedInvert)
- Tmp1 = DAG.getLogicalNOT(dl, Tmp1, Tmp1->getValueType(0));
+ if (NeedInvert) {
+ if (!IsVP)
+ Tmp1 = DAG.getLogicalNOT(dl, Tmp1, Tmp1->getValueType(0));
+ else
+ Tmp1 =
+ DAG.getVPLogicalNOT(dl, Tmp1, Mask, EVL, Tmp1->getValueType(0));
+ }
Results.push_back(Tmp1);
if (IsStrict)
@@ -3622,21 +3671,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
// Otherwise, SETCC for the given comparison type must be completely
// illegal; expand it into a SELECT_CC.
+ // FIXME: This drops the mask/evl for VP_SETCC.
EVT VT = Node->getValueType(0);
- int TrueValue;
- switch (TLI.getBooleanContents(Tmp1.getValueType())) {
- case TargetLowering::ZeroOrOneBooleanContent:
- case TargetLowering::UndefinedBooleanContent:
- TrueValue = 1;
- break;
- case TargetLowering::ZeroOrNegativeOneBooleanContent:
- TrueValue = -1;
- break;
- }
+ EVT Tmp1VT = Tmp1.getValueType();
Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, VT, Tmp1, Tmp2,
- DAG.getConstant(TrueValue, dl, VT),
- DAG.getConstant(0, dl, VT),
- Tmp3);
+ DAG.getBoolConstant(true, dl, VT, Tmp1VT),
+ DAG.getBoolConstant(false, dl, VT, Tmp1VT), Tmp3);
Tmp1->setFlags(Node->getFlags());
Results.push_back(Tmp1);
break;
@@ -3692,7 +3732,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
if (!Legalized) {
Legalized = TLI.LegalizeSetCCCondCode(
DAG, getSetCCResultType(Tmp1.getValueType()), Tmp1, Tmp2, CC,
- NeedInvert, dl, Chain);
+ /*Mask*/ SDValue(), /*EVL*/ SDValue(), NeedInvert, dl, Chain);
assert(Legalized && "Can't legalize SELECT_CC with legal condition!");
@@ -3725,9 +3765,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Tmp3 = Node->getOperand(3); // RHS
Tmp4 = Node->getOperand(1); // CC
- bool Legalized =
- TLI.LegalizeSetCCCondCode(DAG, getSetCCResultType(Tmp2.getValueType()),
- Tmp2, Tmp3, Tmp4, NeedInvert, dl, Chain);
+ bool Legalized = TLI.LegalizeSetCCCondCode(
+ DAG, getSetCCResultType(Tmp2.getValueType()), Tmp2, Tmp3, Tmp4,
+ /*Mask*/ SDValue(), /*EVL*/ SDValue(), NeedInvert, dl, Chain);
(void)Legalized;
assert(Legalized && "Can't legalize BR_CC with legal condition!");
@@ -4068,12 +4108,25 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fpowi.");
if (!TLI.getLibcallName(LC)) {
// Some targets don't have a powi libcall; use pow instead.
- SDValue Exponent = DAG.getNode(ISD::SINT_TO_FP, SDLoc(Node),
- Node->getValueType(0),
- Node->getOperand(1));
- Results.push_back(DAG.getNode(ISD::FPOW, SDLoc(Node),
- Node->getValueType(0), Node->getOperand(0),
- Exponent));
+ if (Node->isStrictFPOpcode()) {
+ SDValue Exponent =
+ DAG.getNode(ISD::STRICT_SINT_TO_FP, SDLoc(Node),
+ {Node->getValueType(0), Node->getValueType(1)},
+ {Node->getOperand(0), Node->getOperand(2)});
+ SDValue FPOW =
+ DAG.getNode(ISD::STRICT_FPOW, SDLoc(Node),
+ {Node->getValueType(0), Node->getValueType(1)},
+ {Exponent.getValue(1), Node->getOperand(1), Exponent});
+ Results.push_back(FPOW);
+ Results.push_back(FPOW.getValue(1));
+ } else {
+ SDValue Exponent =
+ DAG.getNode(ISD::SINT_TO_FP, SDLoc(Node), Node->getValueType(0),
+ Node->getOperand(1));
+ Results.push_back(DAG.getNode(ISD::FPOW, SDLoc(Node),
+ Node->getValueType(0),
+ Node->getOperand(0), Exponent));
+ }
break;
}
unsigned Offset = Node->isStrictFPOpcode() ? 1 : 0;
@@ -4176,6 +4229,13 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
Results.push_back(ExpandLibCall(LC, Node, false));
break;
}
+ case ISD::FP_TO_BF16: {
+ RTLIB::Libcall LC =
+ RTLIB::getFPROUND(Node->getOperand(0).getValueType(), MVT::bf16);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to expand fp_to_bf16");
+ Results.push_back(ExpandLibCall(LC, Node, false));
+ break;
+ }
case ISD::STRICT_SINT_TO_FP:
case ISD::STRICT_UINT_TO_FP:
case ISD::SINT_TO_FP:
@@ -4315,28 +4375,24 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
RTLIB::SUB_PPCF128, Results);
break;
case ISD::SREM:
- Results.push_back(ExpandIntLibCall(Node, true,
- RTLIB::SREM_I8,
- RTLIB::SREM_I16, RTLIB::SREM_I32,
- RTLIB::SREM_I64, RTLIB::SREM_I128));
+ Results.push_back(ExpandIntLibCall(
+ Node, true, RTLIB::SREM_I8, RTLIB::SREM_I16, RTLIB::SREM_I32,
+ RTLIB::SREM_I64, RTLIB::SREM_I128, RTLIB::SREM_IEXT));
break;
case ISD::UREM:
- Results.push_back(ExpandIntLibCall(Node, false,
- RTLIB::UREM_I8,
- RTLIB::UREM_I16, RTLIB::UREM_I32,
- RTLIB::UREM_I64, RTLIB::UREM_I128));
+ Results.push_back(ExpandIntLibCall(
+ Node, false, RTLIB::UREM_I8, RTLIB::UREM_I16, RTLIB::UREM_I32,
+ RTLIB::UREM_I64, RTLIB::UREM_I128, RTLIB::UREM_IEXT));
break;
case ISD::SDIV:
- Results.push_back(ExpandIntLibCall(Node, true,
- RTLIB::SDIV_I8,
- RTLIB::SDIV_I16, RTLIB::SDIV_I32,
- RTLIB::SDIV_I64, RTLIB::SDIV_I128));
+ Results.push_back(ExpandIntLibCall(
+ Node, true, RTLIB::SDIV_I8, RTLIB::SDIV_I16, RTLIB::SDIV_I32,
+ RTLIB::SDIV_I64, RTLIB::SDIV_I128, RTLIB::SDIV_IEXT));
break;
case ISD::UDIV:
- Results.push_back(ExpandIntLibCall(Node, false,
- RTLIB::UDIV_I8,
- RTLIB::UDIV_I16, RTLIB::UDIV_I32,
- RTLIB::UDIV_I64, RTLIB::UDIV_I128));
+ Results.push_back(ExpandIntLibCall(
+ Node, false, RTLIB::UDIV_I8, RTLIB::UDIV_I16, RTLIB::UDIV_I32,
+ RTLIB::UDIV_I64, RTLIB::UDIV_I128, RTLIB::UDIV_IEXT));
break;
case ISD::SDIVREM:
case ISD::UDIVREM:
@@ -4344,10 +4400,9 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
ExpandDivRemLibCall(Node, Results);
break;
case ISD::MUL:
- Results.push_back(ExpandIntLibCall(Node, false,
- RTLIB::MUL_I8,
- RTLIB::MUL_I16, RTLIB::MUL_I32,
- RTLIB::MUL_I64, RTLIB::MUL_I128));
+ Results.push_back(ExpandIntLibCall(
+ Node, false, RTLIB::MUL_I8, RTLIB::MUL_I16, RTLIB::MUL_I32,
+ RTLIB::MUL_I64, RTLIB::MUL_I128, RTLIB::MUL_IEXT));
break;
case ISD::CTLZ_ZERO_UNDEF:
switch (Node->getSimpleValueType(0).SimpleTy) {
@@ -4700,6 +4755,12 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
Tmp3, DAG.getIntPtrConstant(0, dl)));
break;
+ case ISD::STRICT_FADD:
+ case ISD::STRICT_FSUB:
+ case ISD::STRICT_FMUL:
+ case ISD::STRICT_FDIV:
+ case ISD::STRICT_FMINNUM:
+ case ISD::STRICT_FMAXNUM:
case ISD::STRICT_FREM:
case ISD::STRICT_FPOW:
Tmp1 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other},
@@ -4724,6 +4785,22 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2, Tmp3),
DAG.getIntPtrConstant(0, dl)));
break;
+ case ISD::STRICT_FMA:
+ Tmp1 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other},
+ {Node->getOperand(0), Node->getOperand(1)});
+ Tmp2 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other},
+ {Node->getOperand(0), Node->getOperand(2)});
+ Tmp3 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other},
+ {Node->getOperand(0), Node->getOperand(3)});
+ Tmp4 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Tmp1.getValue(1),
+ Tmp2.getValue(1), Tmp3.getValue(1));
+ Tmp4 = DAG.getNode(Node->getOpcode(), dl, {NVT, MVT::Other},
+ {Tmp4, Tmp1, Tmp2, Tmp3});
+ Tmp4 = DAG.getNode(ISD::STRICT_FP_ROUND, dl, {OVT, MVT::Other},
+ {Tmp4.getValue(1), Tmp4, DAG.getIntPtrConstant(0, dl)});
+ Results.push_back(Tmp4);
+ Results.push_back(Tmp4.getValue(1));
+ break;
case ISD::FCOPYSIGN:
case ISD::FPOWI: {
Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
@@ -4740,6 +4817,16 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
Tmp3, DAG.getIntPtrConstant(isTrunc, dl)));
break;
}
+ case ISD::STRICT_FPOWI:
+ Tmp1 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other},
+ {Node->getOperand(0), Node->getOperand(1)});
+ Tmp2 = DAG.getNode(Node->getOpcode(), dl, {NVT, MVT::Other},
+ {Tmp1.getValue(1), Tmp1, Node->getOperand(2)});
+ Tmp3 = DAG.getNode(ISD::STRICT_FP_ROUND, dl, {OVT, MVT::Other},
+ {Tmp2.getValue(1), Tmp2, DAG.getIntPtrConstant(0, dl)});
+ Results.push_back(Tmp3);
+ Results.push_back(Tmp3.getValue(1));
+ break;
case ISD::FFLOOR:
case ISD::FCEIL:
case ISD::FRINT:
@@ -4764,12 +4851,19 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
break;
case ISD::STRICT_FFLOOR:
case ISD::STRICT_FCEIL:
+ case ISD::STRICT_FRINT:
+ case ISD::STRICT_FNEARBYINT:
case ISD::STRICT_FROUND:
+ case ISD::STRICT_FROUNDEVEN:
+ case ISD::STRICT_FTRUNC:
+ case ISD::STRICT_FSQRT:
case ISD::STRICT_FSIN:
case ISD::STRICT_FCOS:
case ISD::STRICT_FLOG:
+ case ISD::STRICT_FLOG2:
case ISD::STRICT_FLOG10:
case ISD::STRICT_FEXP:
+ case ISD::STRICT_FEXP2:
Tmp1 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other},
{Node->getOperand(0), Node->getOperand(1)});
Tmp2 = DAG.getNode(Node->getOpcode(), dl, {NVT, MVT::Other},
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 6bf38d7296a8..f464208cd9dc 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -273,6 +273,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) {
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FMINNUM(SDNode *N) {
+ if (SDValue SelCC = TLI.createSelectForFMINNUM_FMAXNUM(N, DAG))
+ return SoftenFloatRes_SELECT_CC(SelCC.getNode());
return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
RTLIB::FMIN_F32,
RTLIB::FMIN_F64,
@@ -282,6 +284,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMINNUM(SDNode *N) {
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FMAXNUM(SDNode *N) {
+ if (SDValue SelCC = TLI.createSelectForFMINNUM_FMAXNUM(N, DAG))
+ return SoftenFloatRes_SELECT_CC(SelCC.getNode());
return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
RTLIB::FMAX_F32,
RTLIB::FMAX_F64,
@@ -830,6 +834,7 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break;
case ISD::STRICT_FP_TO_FP16:
case ISD::FP_TO_FP16: // Same as FP_ROUND for softening purposes
+ case ISD::FP_TO_BF16:
case ISD::STRICT_FP_ROUND:
case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break;
case ISD::STRICT_FP_TO_SINT:
@@ -881,16 +886,19 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) {
// returns an i16 so doesn't meet the constraints necessary for FP_ROUND.
assert(N->getOpcode() == ISD::FP_ROUND || N->getOpcode() == ISD::FP_TO_FP16 ||
N->getOpcode() == ISD::STRICT_FP_TO_FP16 ||
+ N->getOpcode() == ISD::FP_TO_BF16 ||
N->getOpcode() == ISD::STRICT_FP_ROUND);
bool IsStrict = N->isStrictFPOpcode();
SDValue Op = N->getOperand(IsStrict ? 1 : 0);
EVT SVT = Op.getValueType();
EVT RVT = N->getValueType(0);
- EVT FloatRVT = (N->getOpcode() == ISD::FP_TO_FP16 ||
- N->getOpcode() == ISD::STRICT_FP_TO_FP16)
- ? MVT::f16
- : RVT;
+ EVT FloatRVT = RVT;
+ if (N->getOpcode() == ISD::FP_TO_FP16 ||
+ N->getOpcode() == ISD::STRICT_FP_TO_FP16)
+ FloatRVT = MVT::f16;
+ else if (N->getOpcode() == ISD::FP_TO_BF16)
+ FloatRVT = MVT::bf16;
RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, FloatRVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall");
@@ -2064,9 +2072,13 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_LLRINT(SDNode *N) {
static ISD::NodeType GetPromotionOpcode(EVT OpVT, EVT RetVT) {
if (OpVT == MVT::f16) {
- return ISD::FP16_TO_FP;
+ return ISD::FP16_TO_FP;
} else if (RetVT == MVT::f16) {
- return ISD::FP_TO_FP16;
+ return ISD::FP_TO_FP16;
+ } else if (OpVT == MVT::bf16) {
+ return ISD::BF16_TO_FP;
+ } else if (RetVT == MVT::bf16) {
+ return ISD::FP_TO_BF16;
}
report_fatal_error("Attempt at an invalid promotion-related conversion");
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 8c7b90b6cd33..69fd83bcd7b3 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -78,6 +78,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::SELECT:
case ISD::VSELECT:
case ISD::VP_SELECT:
+ case ISD::VP_MERGE:
Res = PromoteIntRes_Select(N);
break;
case ISD::SELECT_CC: Res = PromoteIntRes_SELECT_CC(N); break;
@@ -97,6 +98,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::VP_ASHR: Res = PromoteIntRes_SRA(N); break;
case ISD::SRL:
case ISD::VP_LSHR: Res = PromoteIntRes_SRL(N); break;
+ case ISD::VP_TRUNCATE:
case ISD::TRUNCATE: Res = PromoteIntRes_TRUNCATE(N); break;
case ISD::UNDEF: Res = PromoteIntRes_UNDEF(N); break;
case ISD::VAARG: Res = PromoteIntRes_VAARG(N); break;
@@ -115,11 +117,12 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::INSERT_VECTOR_ELT:
Res = PromoteIntRes_INSERT_VECTOR_ELT(N); break;
case ISD::BUILD_VECTOR:
- Res = PromoteIntRes_BUILD_VECTOR(N); break;
- case ISD::SCALAR_TO_VECTOR:
- Res = PromoteIntRes_SCALAR_TO_VECTOR(N); break;
+ Res = PromoteIntRes_BUILD_VECTOR(N);
+ break;
case ISD::SPLAT_VECTOR:
- Res = PromoteIntRes_SPLAT_VECTOR(N); break;
+ case ISD::SCALAR_TO_VECTOR:
+ Res = PromoteIntRes_ScalarOp(N);
+ break;
case ISD::STEP_VECTOR: Res = PromoteIntRes_STEP_VECTOR(N); break;
case ISD::CONCAT_VECTORS:
Res = PromoteIntRes_CONCAT_VECTORS(N); break;
@@ -133,6 +136,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND: Res = PromoteIntRes_INT_EXTEND(N); break;
+ case ISD::VP_FPTOSI:
+ case ISD::VP_FPTOUI:
case ISD::STRICT_FP_TO_SINT:
case ISD::STRICT_FP_TO_UINT:
case ISD::FP_TO_SINT:
@@ -262,6 +267,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::FSHR:
Res = PromoteIntRes_FunnelShift(N);
break;
+
+ case ISD::IS_FPCLASS:
+ Res = PromoteIntRes_IS_FPCLASS(N);
+ break;
}
// If the result is null then the sub-method took care of registering it.
@@ -435,10 +444,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
// interesting bits will end up at the wrong place.
if (DAG.getDataLayout().isBigEndian()) {
unsigned ShiftAmt = NInVT.getSizeInBits() - InVT.getSizeInBits();
- EVT ShiftAmtTy = TLI.getShiftAmountTy(NOutVT, DAG.getDataLayout());
assert(ShiftAmt < NOutVT.getSizeInBits() && "Too large shift amount!");
Res = DAG.getNode(ISD::SRL, dl, NOutVT, Res,
- DAG.getConstant(ShiftAmt, dl, ShiftAmtTy));
+ DAG.getShiftAmountConstant(ShiftAmt, NOutVT, dl));
}
return Res;
}
@@ -446,13 +454,13 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
// as the widened input type would be a legal type, we can widen the bitcast
// and handle the promotion after.
if (NOutVT.isVector()) {
- unsigned WidenInSize = NInVT.getSizeInBits();
- unsigned OutSize = OutVT.getSizeInBits();
- if (WidenInSize % OutSize == 0) {
- unsigned Scale = WidenInSize / OutSize;
- EVT WideOutVT = EVT::getVectorVT(*DAG.getContext(),
- OutVT.getVectorElementType(),
- OutVT.getVectorNumElements() * Scale);
+ TypeSize WidenInSize = NInVT.getSizeInBits();
+ TypeSize OutSize = OutVT.getSizeInBits();
+ if (WidenInSize.hasKnownScalarFactor(OutSize)) {
+ unsigned Scale = WidenInSize.getKnownScalarFactor(OutSize);
+ EVT WideOutVT =
+ EVT::getVectorVT(*DAG.getContext(), OutVT.getVectorElementType(),
+ OutVT.getVectorElementCount() * Scale);
if (isTypeLegal(WideOutVT)) {
InOp = DAG.getBitcast(WideOutVT, GetWidenedVector(InOp));
InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OutVT, InOp,
@@ -490,9 +498,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) {
}
unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
- EVT ShiftVT = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());
return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op),
- DAG.getConstant(DiffBits, dl, ShiftVT));
+ DAG.getShiftAmountConstant(DiffBits, NVT, dl));
}
SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) {
@@ -512,10 +519,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) {
}
unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
- EVT ShiftVT = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());
return DAG.getNode(ISD::SRL, dl, NVT,
DAG.getNode(ISD::BITREVERSE, dl, NVT, Op),
- DAG.getConstant(DiffBits, dl, ShiftVT));
+ DAG.getShiftAmountConstant(DiffBits, NVT, dl));
}
SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) {
@@ -666,6 +672,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
TLI.isOperationLegalOrCustom(ISD::STRICT_FP_TO_SINT, NVT))
NewOpc = ISD::STRICT_FP_TO_SINT;
+ if (N->getOpcode() == ISD::VP_FPTOUI &&
+ !TLI.isOperationLegal(ISD::VP_FPTOUI, NVT) &&
+ TLI.isOperationLegalOrCustom(ISD::VP_FPTOSI, NVT))
+ NewOpc = ISD::VP_FPTOSI;
+
SDValue Res;
if (N->isStrictFPOpcode()) {
Res = DAG.getNode(NewOpc, dl, {NVT, MVT::Other},
@@ -673,8 +684,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
- } else
+ } else if (NewOpc == ISD::VP_FPTOSI || NewOpc == ISD::VP_FPTOUI) {
+ Res = DAG.getNode(NewOpc, dl, NVT, {N->getOperand(0), N->getOperand(1),
+ N->getOperand(2)});
+ } else {
Res = DAG.getNode(NewOpc, dl, NVT, N->getOperand(0));
+ }
// Assert that the converted value fits in the original type. If it doesn't
// (eg: because the value being converted is too big), then the result of the
@@ -684,8 +699,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
// before legalization: fp-to-uint16, 65534. -> 0xfffe
// after legalization: fp-to-sint32, 65534. -> 0x0000fffe
return DAG.getNode((N->getOpcode() == ISD::FP_TO_UINT ||
- N->getOpcode() == ISD::STRICT_FP_TO_UINT) ?
- ISD::AssertZext : ISD::AssertSext, dl, NVT, Res,
+ N->getOpcode() == ISD::STRICT_FP_TO_UINT ||
+ N->getOpcode() == ISD::VP_FPTOUI)
+ ? ISD::AssertZext
+ : ISD::AssertSext,
+ dl, NVT, Res,
DAG.getValueType(N->getValueType(0).getScalarType()));
}
@@ -889,8 +907,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSHLSAT(SDNode *N) {
}
unsigned SHLAmount = NewBits - OldBits;
- EVT SHVT = TLI.getShiftAmountTy(PromotedType, DAG.getDataLayout());
- SDValue ShiftAmount = DAG.getConstant(SHLAmount, dl, SHVT);
+ SDValue ShiftAmount =
+ DAG.getShiftAmountConstant(SHLAmount, PromotedType, dl);
Op1Promoted =
DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted, ShiftAmount);
if (!IsShift)
@@ -939,14 +957,14 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MULFIX(SDNode *N) {
// which is extends the values that we clamp to on saturation. This could be
// resolved by shifting one of the operands the same amount, which would
// also shift the result we compare against, then shifting back.
- EVT ShiftTy = TLI.getShiftAmountTy(PromotedType, DAG.getDataLayout());
- Op1Promoted = DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted,
- DAG.getConstant(DiffSize, dl, ShiftTy));
+ Op1Promoted =
+ DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted,
+ DAG.getShiftAmountConstant(DiffSize, PromotedType, dl));
SDValue Result = DAG.getNode(N->getOpcode(), dl, PromotedType, Op1Promoted,
Op2Promoted, N->getOperand(2));
unsigned ShiftOp = Signed ? ISD::SRA : ISD::SRL;
return DAG.getNode(ShiftOp, dl, PromotedType, Result,
- DAG.getConstant(DiffSize, dl, ShiftTy));
+ DAG.getShiftAmountConstant(DiffSize, PromotedType, dl));
}
return DAG.getNode(N->getOpcode(), dl, PromotedType, Op1Promoted, Op2Promoted,
N->getOperand(2));
@@ -1043,17 +1061,17 @@ SDValue DAGTypeLegalizer::PromoteIntRes_DIVFIX(SDNode *N) {
TargetLowering::LegalizeAction Action =
TLI.getFixedPointOperationAction(N->getOpcode(), PromotedType, Scale);
if (Action == TargetLowering::Legal || Action == TargetLowering::Custom) {
- EVT ShiftTy = TLI.getShiftAmountTy(PromotedType, DAG.getDataLayout());
unsigned Diff = PromotedType.getScalarSizeInBits() -
N->getValueType(0).getScalarSizeInBits();
if (Saturating)
- Op1Promoted = DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted,
- DAG.getConstant(Diff, dl, ShiftTy));
+ Op1Promoted =
+ DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted,
+ DAG.getShiftAmountConstant(Diff, PromotedType, dl));
SDValue Res = DAG.getNode(N->getOpcode(), dl, PromotedType, Op1Promoted,
Op2Promoted, N->getOperand(2));
if (Saturating)
Res = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, PromotedType, Res,
- DAG.getConstant(Diff, dl, ShiftTy));
+ DAG.getShiftAmountConstant(Diff, PromotedType, dl));
return Res;
}
}
@@ -1110,11 +1128,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Select(SDNode *N) {
SDValue RHS = GetPromotedInteger(N->getOperand(2));
unsigned Opcode = N->getOpcode();
- return Opcode == ISD::VP_SELECT
- ? DAG.getNode(Opcode, SDLoc(N), LHS.getValueType(), Mask, LHS, RHS,
- N->getOperand(3))
- : DAG.getNode(Opcode, SDLoc(N), LHS.getValueType(), Mask, LHS,
- RHS);
+ if (Opcode == ISD::VP_SELECT || Opcode == ISD::VP_MERGE)
+ return DAG.getNode(Opcode, SDLoc(N), LHS.getValueType(), Mask, LHS, RHS,
+ N->getOperand(3));
+ return DAG.getNode(Opcode, SDLoc(N), LHS.getValueType(), Mask, LHS, RHS);
}
SDValue DAGTypeLegalizer::PromoteIntRes_SELECT_CC(SDNode *N) {
@@ -1167,6 +1184,14 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) {
return DAG.getSExtOrTrunc(SetCC, dl, NVT);
}
+SDValue DAGTypeLegalizer::PromoteIntRes_IS_FPCLASS(SDNode *N) {
+ SDLoc DL(N);
+ SDValue Arg = N->getOperand(0);
+ SDValue Test = N->getOperand(1);
+ EVT NResVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ return DAG.getNode(ISD::IS_FPCLASS, DL, NResVT, Arg, Test);
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) {
SDValue LHS = GetPromotedInteger(N->getOperand(0));
SDValue RHS = N->getOperand(1);
@@ -1265,7 +1290,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Rotate(SDNode *N) {
SDValue DAGTypeLegalizer::PromoteIntRes_FunnelShift(SDNode *N) {
SDValue Hi = GetPromotedInteger(N->getOperand(0));
SDValue Lo = GetPromotedInteger(N->getOperand(1));
- SDValue Amt = GetPromotedInteger(N->getOperand(2));
+ SDValue Amt = N->getOperand(2);
+ if (getTypeAction(Amt.getValueType()) == TargetLowering::TypePromoteInteger)
+ Amt = ZExtPromotedInteger(Amt);
+ EVT AmtVT = Amt.getValueType();
SDLoc DL(N);
EVT OldVT = N->getOperand(0).getValueType();
@@ -1276,7 +1304,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FunnelShift(SDNode *N) {
unsigned NewBits = VT.getScalarSizeInBits();
// Amount has to be interpreted modulo the old bit width.
- Amt = DAG.getNode(ISD::UREM, DL, VT, Amt, DAG.getConstant(OldBits, DL, VT));
+ Amt = DAG.getNode(ISD::UREM, DL, AmtVT, Amt,
+ DAG.getConstant(OldBits, DL, AmtVT));
// If the promoted type is twice the size (or more), then we use the
// traditional funnel 'double' shift codegen. This isn't necessary if the
@@ -1296,13 +1325,13 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FunnelShift(SDNode *N) {
}
// Shift Lo up to occupy the upper bits of the promoted type.
- SDValue ShiftOffset = DAG.getConstant(NewBits - OldBits, DL, VT);
+ SDValue ShiftOffset = DAG.getConstant(NewBits - OldBits, DL, AmtVT);
Lo = DAG.getNode(ISD::SHL, DL, VT, Lo, ShiftOffset);
// Increase Amount to shift the result into the lower bits of the promoted
// type.
if (IsFSHR)
- Amt = DAG.getNode(ISD::ADD, DL, VT, Amt, ShiftOffset);
+ Amt = DAG.getNode(ISD::ADD, DL, AmtVT, Amt, ShiftOffset);
return DAG.getNode(Opcode, DL, VT, Hi, Lo, Amt);
}
@@ -1336,11 +1365,23 @@ SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) {
EVT HalfNVT = EVT::getVectorVT(*DAG.getContext(), NVT.getScalarType(),
NumElts.divideCoefficientBy(2));
- EOp1 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp1);
- EOp2 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp2);
-
+ if (N->getOpcode() == ISD::TRUNCATE) {
+ EOp1 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp1);
+ EOp2 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp2);
+ } else {
+ assert(N->getOpcode() == ISD::VP_TRUNCATE &&
+ "Expected VP_TRUNCATE opcode");
+ SDValue MaskLo, MaskHi, EVLLo, EVLHi;
+ std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(1));
+ std::tie(EVLLo, EVLHi) =
+ DAG.SplitEVL(N->getOperand(2), N->getValueType(0), dl);
+ EOp1 = DAG.getNode(ISD::VP_TRUNCATE, dl, HalfNVT, EOp1, MaskLo, EVLLo);
+ EOp2 = DAG.getNode(ISD::VP_TRUNCATE, dl, HalfNVT, EOp2, MaskHi, EVLHi);
+ }
return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, EOp1, EOp2);
}
+ // TODO: VP_TRUNCATE need to handle when TypeWidenVector access to some
+ // targets.
case TargetLowering::TypeWidenVector: {
SDValue WideInOp = GetWidenedVector(InOp);
@@ -1362,6 +1403,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) {
}
// Truncate to NVT instead of VT
+ if (N->getOpcode() == ISD::VP_TRUNCATE)
+ return DAG.getNode(ISD::VP_TRUNCATE, dl, NVT, Res, N->getOperand(1),
+ N->getOperand(2));
return DAG.getNode(ISD::TRUNCATE, dl, NVT, Res);
}
@@ -1432,6 +1476,19 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO_CARRY(SDNode *N,
}
SDValue DAGTypeLegalizer::PromoteIntRes_ABS(SDNode *N) {
+ EVT OVT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT);
+
+ // If a larger ABS or SMAX isn't supported by the target, try to expand now.
+ // If we expand later we'll end up sign extending more than just the sra input
+ // in sra+xor+sub expansion.
+ if (!OVT.isVector() &&
+ !TLI.isOperationLegalOrCustomOrPromote(ISD::ABS, NVT) &&
+ !TLI.isOperationLegal(ISD::SMAX, NVT)) {
+ if (SDValue Res = TLI.expandABS(N, DAG))
+ return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), NVT, Res);
+ }
+
SDValue Op0 = SExtPromotedInteger(N->getOperand(0));
return DAG.getNode(ISD::ABS, SDLoc(N), Op0.getValueType(), Op0);
}
@@ -1466,9 +1523,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
if (N->getOpcode() == ISD::UMULO) {
// Unsigned overflow occurred if the high part is non-zero.
unsigned Shift = SmallVT.getScalarSizeInBits();
- EVT ShiftTy = TLI.getShiftAmountTy(Mul.getValueType(), DAG.getDataLayout());
- SDValue Hi = DAG.getNode(ISD::SRL, DL, Mul.getValueType(), Mul,
- DAG.getConstant(Shift, DL, ShiftTy));
+ SDValue Hi =
+ DAG.getNode(ISD::SRL, DL, Mul.getValueType(), Mul,
+ DAG.getShiftAmountConstant(Shift, Mul.getValueType(), DL));
Overflow = DAG.getSetCC(DL, N->getValueType(1), Hi,
DAG.getConstant(0, DL, Hi.getValueType()),
ISD::SETNE);
@@ -1498,7 +1555,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VSCALE(SDNode *N) {
EVT VT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
APInt MulImm = cast<ConstantSDNode>(N->getOperand(0))->getAPIntValue();
- return DAG.getVScale(SDLoc(N), VT, MulImm.sextOrSelf(VT.getSizeInBits()));
+ return DAG.getVScale(SDLoc(N), VT, MulImm.sext(VT.getSizeInBits()));
}
SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) {
@@ -1578,16 +1635,19 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::CONCAT_VECTORS: Res = PromoteIntOp_CONCAT_VECTORS(N); break;
case ISD::EXTRACT_VECTOR_ELT: Res = PromoteIntOp_EXTRACT_VECTOR_ELT(N); break;
case ISD::INSERT_VECTOR_ELT:
- Res = PromoteIntOp_INSERT_VECTOR_ELT(N, OpNo);break;
- case ISD::SCALAR_TO_VECTOR:
- Res = PromoteIntOp_SCALAR_TO_VECTOR(N); break;
+ Res = PromoteIntOp_INSERT_VECTOR_ELT(N, OpNo);
+ break;
case ISD::SPLAT_VECTOR:
- Res = PromoteIntOp_SPLAT_VECTOR(N); break;
+ case ISD::SCALAR_TO_VECTOR:
+ Res = PromoteIntOp_ScalarOp(N);
+ break;
case ISD::VSELECT:
case ISD::SELECT: Res = PromoteIntOp_SELECT(N, OpNo); break;
case ISD::SELECT_CC: Res = PromoteIntOp_SELECT_CC(N, OpNo); break;
+ case ISD::VP_SETCC:
case ISD::SETCC: Res = PromoteIntOp_SETCC(N, OpNo); break;
case ISD::SIGN_EXTEND: Res = PromoteIntOp_SIGN_EXTEND(N); break;
+ case ISD::VP_SITOFP:
case ISD::SINT_TO_FP: Res = PromoteIntOp_SINT_TO_FP(N); break;
case ISD::STRICT_SINT_TO_FP: Res = PromoteIntOp_STRICT_SINT_TO_FP(N); break;
case ISD::STORE: Res = PromoteIntOp_STORE(cast<StoreSDNode>(N),
@@ -1600,8 +1660,10 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
OpNo); break;
case ISD::MSCATTER: Res = PromoteIntOp_MSCATTER(cast<MaskedScatterSDNode>(N),
OpNo); break;
+ case ISD::VP_TRUNCATE:
case ISD::TRUNCATE: Res = PromoteIntOp_TRUNCATE(N); break;
case ISD::FP16_TO_FP:
+ case ISD::VP_UITOFP:
case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break;
case ISD::STRICT_UINT_TO_FP: Res = PromoteIntOp_STRICT_UINT_TO_FP(N); break;
case ISD::ZERO_EXTEND: Res = PromoteIntOp_ZERO_EXTEND(N); break;
@@ -1614,6 +1676,9 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::ROTL:
case ISD::ROTR: Res = PromoteIntOp_Shift(N); break;
+ case ISD::FSHL:
+ case ISD::FSHR: Res = PromoteIntOp_FunnelShift(N); break;
+
case ISD::SADDO_CARRY:
case ISD::SSUBO_CARRY:
case ISD::ADDCARRY:
@@ -1848,20 +1913,13 @@ SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N,
N->getOperand(1), Idx), 0);
}
-SDValue DAGTypeLegalizer::PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N) {
- // Integer SCALAR_TO_VECTOR operands are implicitly truncated, so just promote
- // the operand in place.
+SDValue DAGTypeLegalizer::PromoteIntOp_ScalarOp(SDNode *N) {
+ // Integer SPLAT_VECTOR/SCALAR_TO_VECTOR operands are implicitly truncated,
+ // so just promote the operand in place.
return SDValue(DAG.UpdateNodeOperands(N,
GetPromotedInteger(N->getOperand(0))), 0);
}
-SDValue DAGTypeLegalizer::PromoteIntOp_SPLAT_VECTOR(SDNode *N) {
- // Integer SPLAT_VECTOR operands are implicitly truncated, so just promote the
- // operand in place.
- return SDValue(
- DAG.UpdateNodeOperands(N, GetPromotedInteger(N->getOperand(0))), 0);
-}
-
SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) {
assert(OpNo == 0 && "Only know how to promote the condition!");
SDValue Cond = N->getOperand(0);
@@ -1900,7 +1958,14 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SETCC(SDNode *N, unsigned OpNo) {
PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(2))->get());
// The CC (#2) is always legal.
- return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, N->getOperand(2)), 0);
+ if (N->getOpcode() == ISD::SETCC)
+ return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, N->getOperand(2)), 0);
+
+ assert(N->getOpcode() == ISD::VP_SETCC && "Expected VP_SETCC opcode");
+
+ return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, N->getOperand(2),
+ N->getOperand(3), N->getOperand(4)),
+ 0);
}
SDValue DAGTypeLegalizer::PromoteIntOp_Shift(SDNode *N) {
@@ -1908,6 +1973,11 @@ SDValue DAGTypeLegalizer::PromoteIntOp_Shift(SDNode *N) {
ZExtPromotedInteger(N->getOperand(1))), 0);
}
+SDValue DAGTypeLegalizer::PromoteIntOp_FunnelShift(SDNode *N) {
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1),
+ ZExtPromotedInteger(N->getOperand(2))), 0);
+}
+
SDValue DAGTypeLegalizer::PromoteIntOp_SIGN_EXTEND(SDNode *N) {
SDValue Op = GetPromotedInteger(N->getOperand(0));
SDLoc dl(N);
@@ -1917,6 +1987,11 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SIGN_EXTEND(SDNode *N) {
}
SDValue DAGTypeLegalizer::PromoteIntOp_SINT_TO_FP(SDNode *N) {
+ if (N->getOpcode() == ISD::VP_SITOFP)
+ return SDValue(DAG.UpdateNodeOperands(N,
+ SExtPromotedInteger(N->getOperand(0)),
+ N->getOperand(1), N->getOperand(2)),
+ 0);
return SDValue(DAG.UpdateNodeOperands(N,
SExtPromotedInteger(N->getOperand(0))), 0);
}
@@ -1980,8 +2055,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N,
SDValue DAGTypeLegalizer::PromoteIntOp_MGATHER(MaskedGatherSDNode *N,
unsigned OpNo) {
-
SmallVector<SDValue, 5> NewOps(N->op_begin(), N->op_end());
+
if (OpNo == 2) {
// The Mask
EVT DataVT = N->getValueType(0);
@@ -2010,6 +2085,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N,
unsigned OpNo) {
bool TruncateStore = N->isTruncatingStore();
SmallVector<SDValue, 5> NewOps(N->op_begin(), N->op_end());
+
if (OpNo == 2) {
// The Mask
EVT DataVT = N->getValue().getValueType();
@@ -2021,9 +2097,6 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N,
NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo));
else
NewOps[OpNo] = ZExtPromotedInteger(N->getOperand(OpNo));
-
- N->setIndexType(TLI.getCanonicalIndexType(N->getIndexType(),
- N->getMemoryVT(), NewOps[OpNo]));
} else {
NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo));
TruncateStore = true;
@@ -2036,10 +2109,18 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N,
SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) {
SDValue Op = GetPromotedInteger(N->getOperand(0));
+ if (N->getOpcode() == ISD::VP_TRUNCATE)
+ return DAG.getNode(ISD::VP_TRUNCATE, SDLoc(N), N->getValueType(0), Op,
+ N->getOperand(1), N->getOperand(2));
return DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), Op);
}
SDValue DAGTypeLegalizer::PromoteIntOp_UINT_TO_FP(SDNode *N) {
+ if (N->getOpcode() == ISD::VP_UITOFP)
+ return SDValue(DAG.UpdateNodeOperands(N,
+ ZExtPromotedInteger(N->getOperand(0)),
+ N->getOperand(1), N->getOperand(2)),
+ 0);
return SDValue(DAG.UpdateNodeOperands(N,
ZExtPromotedInteger(N->getOperand(0))), 0);
}
@@ -2468,7 +2549,7 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, const APInt &Amt,
EVT ShTy = N->getOperand(1).getValueType();
if (N->getOpcode() == ISD::SHL) {
- if (Amt.ugt(VTBits)) {
+ if (Amt.uge(VTBits)) {
Lo = Hi = DAG.getConstant(0, DL, NVT);
} else if (Amt.ugt(NVTBits)) {
Lo = DAG.getConstant(0, DL, NVT);
@@ -2489,7 +2570,7 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, const APInt &Amt,
}
if (N->getOpcode() == ISD::SRL) {
- if (Amt.ugt(VTBits)) {
+ if (Amt.uge(VTBits)) {
Lo = Hi = DAG.getConstant(0, DL, NVT);
} else if (Amt.ugt(NVTBits)) {
Lo = DAG.getNode(ISD::SRL, DL,
@@ -2510,7 +2591,7 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, const APInt &Amt,
}
assert(N->getOpcode() == ISD::SRA && "Unknown shift!");
- if (Amt.ugt(VTBits)) {
+ if (Amt.uge(VTBits)) {
Hi = Lo = DAG.getNode(ISD::SRA, DL, NVT, InH,
DAG.getConstant(NVTBits - 1, DL, ShTy));
} else if (Amt.ugt(NVTBits)) {
@@ -3132,24 +3213,23 @@ void DAGTypeLegalizer::ExpandIntRes_ABS(SDNode *N, SDValue &Lo, SDValue &Hi) {
GetExpandedInteger(N0, Lo, Hi);
EVT NVT = Lo.getValueType();
- // If we have ADDCARRY, use the expanded form of the sra+add+xor sequence we
- // use in LegalizeDAG. The ADD part of the expansion is based on
- // ExpandIntRes_ADDSUB which also uses ADDCARRY/UADDO after checking that
- // ADDCARRY is LegalOrCustom. Each of the pieces here can be further expanded
+ // If we have SUBCARRY, use the expanded form of the sra+xor+sub sequence we
+ // use in LegalizeDAG. The SUB part of the expansion is based on
+ // ExpandIntRes_ADDSUB which also uses SUBCARRY/USUBO after checking that
+ // SUBCARRY is LegalOrCustom. Each of the pieces here can be further expanded
// if needed. Shift expansion has a special case for filling with sign bits
// so that we will only end up with one SRA.
- bool HasAddCarry = TLI.isOperationLegalOrCustom(
- ISD::ADDCARRY, TLI.getTypeToExpandTo(*DAG.getContext(), NVT));
- if (HasAddCarry) {
- EVT ShiftAmtTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());
- SDValue Sign =
- DAG.getNode(ISD::SRA, dl, NVT, Hi,
- DAG.getConstant(NVT.getSizeInBits() - 1, dl, ShiftAmtTy));
+ bool HasSubCarry = TLI.isOperationLegalOrCustom(
+ ISD::SUBCARRY, TLI.getTypeToExpandTo(*DAG.getContext(), NVT));
+ if (HasSubCarry) {
+ SDValue Sign = DAG.getNode(
+ ISD::SRA, dl, NVT, Hi,
+ DAG.getShiftAmountConstant(NVT.getSizeInBits() - 1, NVT, dl));
SDVTList VTList = DAG.getVTList(NVT, getSetCCResultType(NVT));
- Lo = DAG.getNode(ISD::UADDO, dl, VTList, Lo, Sign);
- Hi = DAG.getNode(ISD::ADDCARRY, dl, VTList, Hi, Sign, Lo.getValue(1));
Lo = DAG.getNode(ISD::XOR, dl, NVT, Lo, Sign);
Hi = DAG.getNode(ISD::XOR, dl, NVT, Hi, Sign);
+ Lo = DAG.getNode(ISD::USUBO, dl, VTList, Lo, Sign);
+ Hi = DAG.getNode(ISD::SUBCARRY, dl, VTList, Hi, Sign, Lo.getValue(1));
return;
}
@@ -3160,8 +3240,8 @@ void DAGTypeLegalizer::ExpandIntRes_ABS(SDNode *N, SDValue &Lo, SDValue &Hi) {
SDValue NegLo, NegHi;
SplitInteger(Neg, NegLo, NegHi);
- SDValue HiIsNeg = DAG.getSetCC(dl, getSetCCResultType(NVT),
- DAG.getConstant(0, dl, NVT), Hi, ISD::SETGT);
+ SDValue HiIsNeg = DAG.getSetCC(dl, getSetCCResultType(NVT), Hi,
+ DAG.getConstant(0, dl, NVT), ISD::SETLT);
Lo = DAG.getSelect(dl, NVT, HiIsNeg, NegLo, Lo);
Hi = DAG.getSelect(dl, NVT, HiIsNeg, NegHi, Hi);
}
@@ -3223,12 +3303,11 @@ void DAGTypeLegalizer::ExpandIntRes_FLT_ROUNDS(SDNode *N, SDValue &Lo,
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
unsigned NBitWidth = NVT.getSizeInBits();
- EVT ShiftAmtTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());
Lo = DAG.getNode(ISD::FLT_ROUNDS_, dl, {NVT, MVT::Other}, N->getOperand(0));
SDValue Chain = Lo.getValue(1);
// The high part is the sign of Lo, as -1 is a valid value for FLT_ROUNDS
Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
- DAG.getConstant(NBitWidth - 1, dl, ShiftAmtTy));
+ DAG.getShiftAmountConstant(NBitWidth - 1, NVT, dl));
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
@@ -3535,8 +3614,7 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,
SDValue T = DAG.getNode(ISD::MUL, dl, NVT, LLL, RLL);
SDValue TL = DAG.getNode(ISD::AND, dl, NVT, T, Mask);
- EVT ShiftAmtTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());
- SDValue Shift = DAG.getConstant(HalfBits, dl, ShiftAmtTy);
+ SDValue Shift = DAG.getShiftAmountConstant(HalfBits, NVT, dl);
SDValue TH = DAG.getNode(ISD::SRL, dl, NVT, T, Shift);
SDValue LLH = DAG.getNode(ISD::SRL, dl, NVT, LL, Shift);
SDValue RLH = DAG.getNode(ISD::SRL, dl, NVT, RL, Shift);
@@ -3667,7 +3745,6 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo,
unsigned NVTSize = NVT.getScalarSizeInBits();
assert((VTSize == NVTSize * 2) && "Expected the new value type to be half "
"the size of the current value type");
- EVT ShiftTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());
// After getting the multiplication result in 4 parts, we need to perform a
// shift right by the amount of the scale to get the result in that scale.
@@ -3690,7 +3767,7 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo,
// shifting.
uint64_t Part0 = Scale / NVTSize; // Part holding lowest bit needed.
if (Scale % NVTSize) {
- SDValue ShiftAmount = DAG.getConstant(Scale % NVTSize, dl, ShiftTy);
+ SDValue ShiftAmount = DAG.getShiftAmountConstant(Scale % NVTSize, NVT, dl);
Lo = DAG.getNode(ISD::FSHR, dl, NVT, Result[Part0 + 1], Result[Part0],
ShiftAmount);
Hi = DAG.getNode(ISD::FSHR, dl, NVT, Result[Part0 + 2], Result[Part0 + 1],
@@ -3731,8 +3808,9 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo,
if (!Signed) {
if (Scale < NVTSize) {
// Overflow happened if ((HH | (HL >> Scale)) != 0).
- SDValue HLAdjusted = DAG.getNode(ISD::SRL, dl, NVT, ResultHL,
- DAG.getConstant(Scale, dl, ShiftTy));
+ SDValue HLAdjusted =
+ DAG.getNode(ISD::SRL, dl, NVT, ResultHL,
+ DAG.getShiftAmountConstant(Scale, NVT, dl));
SDValue Tmp = DAG.getNode(ISD::OR, dl, NVT, HLAdjusted, ResultHH);
SatMax = DAG.getSetCC(dl, BoolNVT, Tmp, NVTZero, ISD::SETNE);
} else if (Scale == NVTSize) {
@@ -3740,9 +3818,9 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo,
SatMax = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETNE);
} else if (Scale < VTSize) {
// Overflow happened if ((HH >> (Scale - NVTSize)) != 0).
- SDValue HLAdjusted = DAG.getNode(ISD::SRL, dl, NVT, ResultHL,
- DAG.getConstant(Scale - NVTSize, dl,
- ShiftTy));
+ SDValue HLAdjusted =
+ DAG.getNode(ISD::SRL, dl, NVT, ResultHL,
+ DAG.getShiftAmountConstant(Scale - NVTSize, NVT, dl));
SatMax = DAG.getSetCC(dl, BoolNVT, HLAdjusted, NVTZero, ISD::SETNE);
} else
llvm_unreachable("Scale must be less or equal to VTSize for UMULFIXSAT"
@@ -3901,6 +3979,70 @@ void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node,
ReplaceValueWith(SDValue(Node, 1), Ovf);
}
+// Emit a call to __udivei4 and friends which require
+// the arguments be based on the stack
+// and extra argument that contains the number of bits of the operands.
+// Returns the result of the call operation.
+static SDValue ExpandExtIntRes_DIVREM(const TargetLowering &TLI,
+ const RTLIB::Libcall &LC,
+ SelectionDAG &DAG, SDNode *N,
+ const SDLoc &DL, const EVT &VT) {
+
+ SDValue InChain = DAG.getEntryNode();
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+
+ // The signature of __udivei4 is
+ // void __udivei4(unsigned int *quo, unsigned int *a, unsigned int *b,
+ // unsigned int bits)
+ EVT ArgVT = N->op_begin()->getValueType();
+ assert(ArgVT.isInteger() && ArgVT.getSizeInBits() > 128 &&
+ "Unexpected argument type for lowering");
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+
+ SDValue Output = DAG.CreateStackTemporary(ArgVT);
+ Entry.Node = Output;
+ Entry.Ty = ArgTy->getPointerTo();
+ Entry.IsSExt = false;
+ Entry.IsZExt = false;
+ Args.push_back(Entry);
+
+ for (const llvm::SDUse &Op : N->ops()) {
+ SDValue StackPtr = DAG.CreateStackTemporary(ArgVT);
+ InChain = DAG.getStore(InChain, DL, Op, StackPtr, MachinePointerInfo());
+ Entry.Node = StackPtr;
+ Entry.Ty = ArgTy->getPointerTo();
+ Entry.IsSExt = false;
+ Entry.IsZExt = false;
+ Args.push_back(Entry);
+ }
+
+ int Bits = N->getOperand(0)
+ .getValueType()
+ .getTypeForEVT(*DAG.getContext())
+ ->getIntegerBitWidth();
+ Entry.Node = DAG.getConstant(Bits, DL, TLI.getPointerTy(DAG.getDataLayout()));
+ Entry.Ty = Type::getInt32Ty(*DAG.getContext());
+ Entry.IsSExt = false;
+ Entry.IsZExt = true;
+ Args.push_back(Entry);
+
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+ TLI.getPointerTy(DAG.getDataLayout()));
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(DL)
+ .setChain(InChain)
+ .setLibCallee(TLI.getLibcallCallingConv(LC),
+ Type::getVoidTy(*DAG.getContext()), Callee, std::move(Args))
+ .setDiscardResult();
+
+ SDValue Chain = TLI.LowerCallTo(CLI).second;
+
+ return DAG.getLoad(ArgVT, DL, Chain, Output, MachinePointerInfo());
+}
+
void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N,
SDValue &Lo, SDValue &Hi) {
EVT VT = N->getValueType(0);
@@ -3922,6 +4064,14 @@ void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N,
LC = RTLIB::SDIV_I64;
else if (VT == MVT::i128)
LC = RTLIB::SDIV_I128;
+
+ else {
+ SDValue Result =
+ ExpandExtIntRes_DIVREM(TLI, RTLIB::SDIV_IEXT, DAG, N, dl, VT);
+ SplitInteger(Result, Lo, Hi);
+ return;
+ }
+
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
TargetLowering::MakeLibCallOptions CallOptions;
@@ -4113,6 +4263,14 @@ void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N,
LC = RTLIB::SREM_I64;
else if (VT == MVT::i128)
LC = RTLIB::SREM_I128;
+
+ else {
+ SDValue Result =
+ ExpandExtIntRes_DIVREM(TLI, RTLIB::SREM_IEXT, DAG, N, dl, VT);
+ SplitInteger(Result, Lo, Hi);
+ return;
+ }
+
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
TargetLowering::MakeLibCallOptions CallOptions;
@@ -4288,6 +4446,14 @@ void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N,
LC = RTLIB::UDIV_I64;
else if (VT == MVT::i128)
LC = RTLIB::UDIV_I128;
+
+ else {
+ SDValue Result =
+ ExpandExtIntRes_DIVREM(TLI, RTLIB::UDIV_IEXT, DAG, N, dl, VT);
+ SplitInteger(Result, Lo, Hi);
+ return;
+ }
+
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!");
TargetLowering::MakeLibCallOptions CallOptions;
@@ -4315,6 +4481,14 @@ void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N,
LC = RTLIB::UREM_I64;
else if (VT == MVT::i128)
LC = RTLIB::UREM_I128;
+
+ else {
+ SDValue Result =
+ ExpandExtIntRes_DIVREM(TLI, RTLIB::UREM_IEXT, DAG, N, dl, VT);
+ SplitInteger(Result, Lo, Hi);
+ return;
+ }
+
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!");
TargetLowering::MakeLibCallOptions CallOptions;
@@ -5060,7 +5234,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_VECTOR(SDNode *N) {
return DAG.getBuildVector(NOutVT, dl, Ops);
}
-SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntRes_ScalarOp(SDNode *N) {
SDLoc dl(N);
@@ -5070,35 +5244,19 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) {
EVT OutVT = N->getValueType(0);
EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
assert(NOutVT.isVector() && "This type must be promoted to a vector type");
- EVT NOutVTElem = NOutVT.getVectorElementType();
-
- SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(0));
-
- return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NOutVT, Op);
-}
-
-SDValue DAGTypeLegalizer::PromoteIntRes_SPLAT_VECTOR(SDNode *N) {
- SDLoc dl(N);
-
- SDValue SplatVal = N->getOperand(0);
-
- assert(!SplatVal.getValueType().isVector() && "Input must be a scalar");
-
- EVT OutVT = N->getValueType(0);
- EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
- assert(NOutVT.isVector() && "Type must be promoted to a vector type");
EVT NOutElemVT = NOutVT.getVectorElementType();
- SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutElemVT, SplatVal);
+ SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutElemVT, N->getOperand(0));
- return DAG.getNode(ISD::SPLAT_VECTOR, dl, NOutVT, Op);
+ return DAG.getNode(N->getOpcode(), dl, NOutVT, Op);
}
SDValue DAGTypeLegalizer::PromoteIntRes_STEP_VECTOR(SDNode *N) {
SDLoc dl(N);
EVT OutVT = N->getValueType(0);
EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
- assert(NOutVT.isVector() && "Type must be promoted to a vector type");
+ assert(NOutVT.isScalableVector() &&
+ "Type must be promoted to a scalable vector type");
APInt StepVal = cast<ConstantSDNode>(N->getOperand(0))->getAPIntValue();
return DAG.getStepVector(dl, NOutVT,
StepVal.sext(NOutVT.getScalarSizeInBits()));
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 03dcd0f6d2c9..8fe9a83b9c3d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -13,10 +13,7 @@
//===----------------------------------------------------------------------===//
#include "LegalizeTypes.h"
-#include "SDNodeDbgValue.h"
#include "llvm/ADT/SetVector.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
@@ -86,46 +83,49 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
auto ResId = ValueToIdMap.lookup(Res);
unsigned Mapped = 0;
- if (ResId && (ReplacedValues.find(ResId) != ReplacedValues.end())) {
- Mapped |= 1;
- // Check that remapped values are only used by nodes marked NewNode.
- for (SDNode::use_iterator UI = Node.use_begin(), UE = Node.use_end();
- UI != UE; ++UI)
- if (UI.getUse().getResNo() == i)
- assert(UI->getNodeId() == NewNode &&
- "Remapped value has non-trivial use!");
-
- // Check that the final result of applying ReplacedValues is not
- // marked NewNode.
- auto NewValId = ReplacedValues[ResId];
- auto I = ReplacedValues.find(NewValId);
- while (I != ReplacedValues.end()) {
- NewValId = I->second;
+ if (ResId) {
+ auto I = ReplacedValues.find(ResId);
+ if (I != ReplacedValues.end()) {
+ Mapped |= 1;
+ // Check that remapped values are only used by nodes marked NewNode.
+ for (SDNode::use_iterator UI = Node.use_begin(), UE = Node.use_end();
+ UI != UE; ++UI)
+ if (UI.getUse().getResNo() == i)
+ assert(UI->getNodeId() == NewNode &&
+ "Remapped value has non-trivial use!");
+
+ // Check that the final result of applying ReplacedValues is not
+ // marked NewNode.
+ auto NewValId = I->second;
I = ReplacedValues.find(NewValId);
+ while (I != ReplacedValues.end()) {
+ NewValId = I->second;
+ I = ReplacedValues.find(NewValId);
+ }
+ SDValue NewVal = getSDValue(NewValId);
+ (void)NewVal;
+ assert(NewVal.getNode()->getNodeId() != NewNode &&
+ "ReplacedValues maps to a new node!");
}
- SDValue NewVal = getSDValue(NewValId);
- (void)NewVal;
- assert(NewVal.getNode()->getNodeId() != NewNode &&
- "ReplacedValues maps to a new node!");
+ if (PromotedIntegers.count(ResId))
+ Mapped |= 2;
+ if (SoftenedFloats.count(ResId))
+ Mapped |= 4;
+ if (ScalarizedVectors.count(ResId))
+ Mapped |= 8;
+ if (ExpandedIntegers.count(ResId))
+ Mapped |= 16;
+ if (ExpandedFloats.count(ResId))
+ Mapped |= 32;
+ if (SplitVectors.count(ResId))
+ Mapped |= 64;
+ if (WidenedVectors.count(ResId))
+ Mapped |= 128;
+ if (PromotedFloats.count(ResId))
+ Mapped |= 256;
+ if (SoftPromotedHalfs.count(ResId))
+ Mapped |= 512;
}
- if (ResId && PromotedIntegers.find(ResId) != PromotedIntegers.end())
- Mapped |= 2;
- if (ResId && SoftenedFloats.find(ResId) != SoftenedFloats.end())
- Mapped |= 4;
- if (ResId && ScalarizedVectors.find(ResId) != ScalarizedVectors.end())
- Mapped |= 8;
- if (ResId && ExpandedIntegers.find(ResId) != ExpandedIntegers.end())
- Mapped |= 16;
- if (ResId && ExpandedFloats.find(ResId) != ExpandedFloats.end())
- Mapped |= 32;
- if (ResId && SplitVectors.find(ResId) != SplitVectors.end())
- Mapped |= 64;
- if (ResId && WidenedVectors.find(ResId) != WidenedVectors.end())
- Mapped |= 128;
- if (ResId && PromotedFloats.find(ResId) != PromotedFloats.end())
- Mapped |= 256;
- if (ResId && SoftPromotedHalfs.find(ResId) != SoftPromotedHalfs.end())
- Mapped |= 512;
if (Node.getNodeId() != Processed) {
// Since we allow ReplacedValues to map deleted nodes, it may map nodes
@@ -143,8 +143,16 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
}
} else {
if (Mapped == 0) {
- dbgs() << "Processed value not in any map!";
- Failed = true;
+ SDValue NodeById = IdToValueMap.lookup(ResId);
+ // It is possible the node has been remapped to another node and had
+ // its Id updated in the Value to Id table. The node it remapped to
+ // may not have been processed yet. Look up the Id in the Id to Value
+ // table and re-check the Processed state. If the node hasn't been
+ // remapped we'll get the same state as we got earlier.
+ if (NodeById->getNodeId() == Processed) {
+ dbgs() << "Processed value not in any map!";
+ Failed = true;
+ }
} else if (Mapped & (Mapped - 1)) {
dbgs() << "Value in multiple maps!";
Failed = true;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 4d8daa82d8c0..de320290bda9 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -19,7 +19,6 @@
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Debug.h"
namespace llvm {
@@ -309,8 +308,7 @@ private:
SDValue PromoteIntRes_VECTOR_SHUFFLE(SDNode *N);
SDValue PromoteIntRes_VECTOR_SPLICE(SDNode *N);
SDValue PromoteIntRes_BUILD_VECTOR(SDNode *N);
- SDValue PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N);
- SDValue PromoteIntRes_SPLAT_VECTOR(SDNode *N);
+ SDValue PromoteIntRes_ScalarOp(SDNode *N);
SDValue PromoteIntRes_STEP_VECTOR(SDNode *N);
SDValue PromoteIntRes_EXTEND_VECTOR_INREG(SDNode *N);
SDValue PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N);
@@ -362,6 +360,7 @@ private:
SDValue PromoteIntRes_ABS(SDNode *N);
SDValue PromoteIntRes_Rotate(SDNode *N);
SDValue PromoteIntRes_FunnelShift(SDNode *N);
+ SDValue PromoteIntRes_IS_FPCLASS(SDNode *N);
// Integer Operand Promotion.
bool PromoteIntegerOperand(SDNode *N, unsigned OpNo);
@@ -377,12 +376,12 @@ private:
SDValue PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N);
SDValue PromoteIntOp_INSERT_SUBVECTOR(SDNode *N);
SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N);
- SDValue PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N);
- SDValue PromoteIntOp_SPLAT_VECTOR(SDNode *N);
+ SDValue PromoteIntOp_ScalarOp(SDNode *N);
SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_SETCC(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_Shift(SDNode *N);
+ SDValue PromoteIntOp_FunnelShift(SDNode *N);
SDValue PromoteIntOp_SIGN_EXTEND(SDNode *N);
SDValue PromoteIntOp_SINT_TO_FP(SDNode *N);
SDValue PromoteIntOp_STRICT_SINT_TO_FP(SDNode *N);
@@ -784,6 +783,7 @@ private:
SDValue ScalarizeVecRes_UNDEF(SDNode *N);
SDValue ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N);
SDValue ScalarizeVecRes_FP_TO_XINT_SAT(SDNode *N);
+ SDValue ScalarizeVecRes_IS_FPCLASS(SDNode *N);
SDValue ScalarizeVecRes_FIX(SDNode *N);
@@ -850,6 +850,7 @@ private:
void SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_FCOPYSIGN(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_IS_FPCLASS(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi);
void SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo, SDValue &Hi);
@@ -960,6 +961,7 @@ private:
SDValue WidenVecRes_Convert_StrictFP(SDNode *N);
SDValue WidenVecRes_FP_TO_XINT_SAT(SDNode *N);
SDValue WidenVecRes_FCOPYSIGN(SDNode *N);
+ SDValue WidenVecRes_IS_FPCLASS(SDNode *N);
SDValue WidenVecRes_POWI(SDNode *N);
SDValue WidenVecRes_Unary(SDNode *N);
SDValue WidenVecRes_InregOp(SDNode *N);
@@ -985,6 +987,7 @@ private:
SDValue WidenVecOp_Convert(SDNode *N);
SDValue WidenVecOp_FP_TO_XINT_SAT(SDNode *N);
SDValue WidenVecOp_FCOPYSIGN(SDNode *N);
+ SDValue WidenVecOp_IS_FPCLASS(SDNode *N);
SDValue WidenVecOp_VECREDUCE(SDNode *N);
SDValue WidenVecOp_VECREDUCE_SEQ(SDNode *N);
SDValue WidenVecOp_VP_REDUCE(SDNode *N);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index abf6a3ac6916..842ffa2aa23e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -26,11 +26,9 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/ADT/APInt.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/ISDOpcodes.h"
-#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -41,7 +39,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MachineValueType.h"
-#include "llvm/Support/MathExtras.h"
#include <cassert>
#include <cstdint>
#include <iterator>
@@ -464,6 +461,12 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::VPID: { \
EVT LegalizeVT = LEGALPOS < 0 ? Node->getValueType(-(1 + LEGALPOS)) \
: Node->getOperand(LEGALPOS).getValueType(); \
+ if (ISD::VPID == ISD::VP_SETCC) { \
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get(); \
+ Action = TLI.getCondCodeAction(CCCode, LegalizeVT.getSimpleVT()); \
+ if (Action != TargetLowering::Legal) \
+ break; \
+ } \
Action = TLI.getOperationAction(Node->getOpcode(), LegalizeVT); \
} break;
#include "llvm/IR/VPIntrinsics.def"
@@ -747,6 +750,7 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
ExpandFSUB(Node, Results);
return;
case ISD::SETCC:
+ case ISD::VP_SETCC:
ExpandSETCC(Node, Results);
return;
case ISD::ABS:
@@ -1050,10 +1054,7 @@ SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node) {
// Shuffle the incoming lanes into the correct position, and pull all other
// lanes from the zero vector.
- SmallVector<int, 16> ShuffleMask;
- ShuffleMask.reserve(NumSrcElements);
- for (int i = 0; i < NumSrcElements; ++i)
- ShuffleMask.push_back(i);
+ auto ShuffleMask = llvm::to_vector<16>(llvm::seq<int>(0, NumSrcElements));
int ExtLaneScale = NumSrcElements / NumElements;
int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0;
@@ -1423,6 +1424,7 @@ void VectorLegalizer::ExpandFSUB(SDNode *Node,
void VectorLegalizer::ExpandSETCC(SDNode *Node,
SmallVectorImpl<SDValue> &Results) {
bool NeedInvert = false;
+ bool IsVP = Node->getOpcode() == ISD::VP_SETCC;
SDLoc dl(Node);
MVT OpVT = Node->getOperand(0).getSimpleValueType();
ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get();
@@ -1436,20 +1438,36 @@ void VectorLegalizer::ExpandSETCC(SDNode *Node,
SDValue LHS = Node->getOperand(0);
SDValue RHS = Node->getOperand(1);
SDValue CC = Node->getOperand(2);
- bool Legalized = TLI.LegalizeSetCCCondCode(DAG, Node->getValueType(0), LHS,
- RHS, CC, NeedInvert, dl, Chain);
+ SDValue Mask, EVL;
+ if (IsVP) {
+ Mask = Node->getOperand(3);
+ EVL = Node->getOperand(4);
+ }
+
+ bool Legalized =
+ TLI.LegalizeSetCCCondCode(DAG, Node->getValueType(0), LHS, RHS, CC, Mask,
+ EVL, NeedInvert, dl, Chain);
if (Legalized) {
// If we expanded the SETCC by swapping LHS and RHS, or by inverting the
// condition code, create a new SETCC node.
- if (CC.getNode())
- LHS = DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), LHS, RHS, CC,
- Node->getFlags());
+ if (CC.getNode()) {
+ if (!IsVP)
+ LHS = DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), LHS, RHS, CC,
+ Node->getFlags());
+ else
+ LHS = DAG.getNode(ISD::VP_SETCC, dl, Node->getValueType(0),
+ {LHS, RHS, CC, Mask, EVL}, Node->getFlags());
+ }
// If we expanded the SETCC by inverting the condition code, then wrap
// the existing SETCC in a NOT to restore the intended condition.
- if (NeedInvert)
- LHS = DAG.getLogicalNOT(dl, LHS, LHS->getValueType(0));
+ if (NeedInvert) {
+ if (!IsVP)
+ LHS = DAG.getLogicalNOT(dl, LHS, LHS->getValueType(0));
+ else
+ LHS = DAG.getVPLogicalNOT(dl, LHS, Mask, EVL, LHS->getValueType(0));
+ }
} else {
// Otherwise, SETCC for the given comparison type must be completely
// illegal; expand it into a SELECT_CC.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 0bd44ce4c872..fa555be00ded 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -20,7 +20,9 @@
//===----------------------------------------------------------------------===//
#include "LegalizeTypes.h"
+#include "llvm/ADT/SmallBitVector.h"
#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TypeSize.h"
@@ -64,6 +66,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::SETCC: R = ScalarizeVecRes_SETCC(N); break;
case ISD::UNDEF: R = ScalarizeVecRes_UNDEF(N); break;
case ISD::VECTOR_SHUFFLE: R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break;
+ case ISD::IS_FPCLASS: R = ScalarizeVecRes_IS_FPCLASS(N); break;
case ISD::ANY_EXTEND_VECTOR_INREG:
case ISD::SIGN_EXTEND_VECTOR_INREG:
case ISD::ZERO_EXTEND_VECTOR_INREG:
@@ -231,9 +234,16 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_StrictFPOp(SDNode *N) {
// Now process the remaining operands.
for (unsigned i = 1; i < NumOpers; ++i) {
SDValue Oper = N->getOperand(i);
+ EVT OperVT = Oper.getValueType();
- if (Oper.getValueType().isVector())
- Oper = GetScalarizedVector(Oper);
+ if (OperVT.isVector()) {
+ if (getTypeAction(OperVT) == TargetLowering::TypeScalarizeVector)
+ Oper = GetScalarizedVector(Oper);
+ else
+ Oper = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ OperVT.getVectorElementType(), Oper,
+ DAG.getVectorIdxConstant(0, dl));
+ }
Opers[i] = Oper;
}
@@ -582,6 +592,29 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SETCC(SDNode *N) {
return DAG.getNode(ExtendCode, DL, NVT, Res);
}
+SDValue DAGTypeLegalizer::ScalarizeVecRes_IS_FPCLASS(SDNode *N) {
+ SDLoc DL(N);
+ SDValue Arg = N->getOperand(0);
+ SDValue Test = N->getOperand(1);
+ EVT ArgVT = Arg.getValueType();
+ EVT ResultVT = N->getValueType(0).getVectorElementType();
+
+ if (getTypeAction(ArgVT) == TargetLowering::TypeScalarizeVector) {
+ Arg = GetScalarizedVector(Arg);
+ } else {
+ EVT VT = ArgVT.getVectorElementType();
+ Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Arg,
+ DAG.getVectorIdxConstant(0, DL));
+ }
+
+ SDValue Res =
+ DAG.getNode(ISD::IS_FPCLASS, DL, MVT::i1, {Arg, Test}, N->getFlags());
+ // Vectors may have a different boolean contents to scalars. Promote the
+ // value appropriately.
+ ISD::NodeType ExtendCode =
+ TargetLowering::getExtendForContent(TLI.getBooleanContents(ArgVT));
+ return DAG.getNode(ExtendCode, DL, ResultVT, Res);
+}
//===----------------------------------------------------------------------===//
// Operand Vector Scalarization <1 x ty> -> ty.
@@ -926,6 +959,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::INSERT_SUBVECTOR: SplitVecRes_INSERT_SUBVECTOR(N, Lo, Hi); break;
case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break;
case ISD::FCOPYSIGN: SplitVecRes_FCOPYSIGN(N, Lo, Hi); break;
+ case ISD::IS_FPCLASS: SplitVecRes_IS_FPCLASS(N, Lo, Hi); break;
case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break;
case ISD::SPLAT_VECTOR:
case ISD::SCALAR_TO_VECTOR:
@@ -949,6 +983,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
SplitVecRes_Gather(cast<MemSDNode>(N), Lo, Hi, /*SplitSETCC*/ true);
break;
case ISD::SETCC:
+ case ISD::VP_SETCC:
SplitVecRes_SETCC(N, Lo, Hi);
break;
case ISD::VECTOR_REVERSE:
@@ -988,13 +1023,17 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FLOG10:
case ISD::FLOG2:
case ISD::FNEARBYINT:
- case ISD::FNEG:
+ case ISD::FNEG: case ISD::VP_FNEG:
case ISD::FREEZE:
case ISD::ARITH_FENCE:
case ISD::FP_EXTEND:
+ case ISD::VP_FP_EXTEND:
case ISD::FP_ROUND:
+ case ISD::VP_FP_ROUND:
case ISD::FP_TO_SINT:
+ case ISD::VP_FPTOSI:
case ISD::FP_TO_UINT:
+ case ISD::VP_FPTOUI:
case ISD::FRINT:
case ISD::FROUND:
case ISD::FROUNDEVEN:
@@ -1002,8 +1041,11 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FSQRT:
case ISD::FTRUNC:
case ISD::SINT_TO_FP:
+ case ISD::VP_SITOFP:
case ISD::TRUNCATE:
+ case ISD::VP_TRUNCATE:
case ISD::UINT_TO_FP:
+ case ISD::VP_UITOFP:
case ISD::FCANONICALIZE:
SplitVecRes_UnaryOp(N, Lo, Hi);
break;
@@ -1011,6 +1053,8 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::ANY_EXTEND:
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
+ case ISD::VP_SIGN_EXTEND:
+ case ISD::VP_ZERO_EXTEND:
SplitVecRes_ExtendOp(N, Lo, Hi);
break;
@@ -1053,7 +1097,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::ROTR:
SplitVecRes_BinOp(N, Lo, Hi);
break;
- case ISD::FMA:
+ case ISD::FMA: case ISD::VP_FMA:
case ISD::FSHL:
case ISD::FSHR:
SplitVecRes_TernaryOp(N, Lo, Hi);
@@ -1175,10 +1219,28 @@ void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo,
GetSplitVector(N->getOperand(2), Op2Lo, Op2Hi);
SDLoc dl(N);
- Lo = DAG.getNode(N->getOpcode(), dl, Op0Lo.getValueType(), Op0Lo, Op1Lo,
- Op2Lo, N->getFlags());
- Hi = DAG.getNode(N->getOpcode(), dl, Op0Hi.getValueType(), Op0Hi, Op1Hi,
- Op2Hi, N->getFlags());
+ const SDNodeFlags Flags = N->getFlags();
+ unsigned Opcode = N->getOpcode();
+ if (N->getNumOperands() == 3) {
+ Lo = DAG.getNode(Opcode, dl, Op0Lo.getValueType(), Op0Lo, Op1Lo, Op2Lo, Flags);
+ Hi = DAG.getNode(Opcode, dl, Op0Hi.getValueType(), Op0Hi, Op1Hi, Op2Hi, Flags);
+ return;
+ }
+
+ assert(N->getNumOperands() == 5 && "Unexpected number of operands!");
+ assert(N->isVPOpcode() && "Expected VP opcode");
+
+ SDValue MaskLo, MaskHi;
+ std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(3));
+
+ SDValue EVLLo, EVLHi;
+ std::tie(EVLLo, EVLHi) =
+ DAG.SplitEVL(N->getOperand(4), N->getValueType(0), dl);
+
+ Lo = DAG.getNode(Opcode, dl, Op0Lo.getValueType(),
+ {Op0Lo, Op1Lo, Op2Lo, MaskLo, EVLLo}, Flags);
+ Hi = DAG.getNode(Opcode, dl, Op0Hi.getValueType(),
+ {Op0Hi, Op1Hi, Op2Hi, MaskHi, EVLHi}, Flags);
}
void DAGTypeLegalizer::SplitVecRes_FIX(SDNode *N, SDValue &Lo, SDValue &Hi) {
@@ -1398,6 +1460,19 @@ void DAGTypeLegalizer::SplitVecRes_FCOPYSIGN(SDNode *N, SDValue &Lo,
Hi = DAG.getNode(ISD::FCOPYSIGN, DL, LHSHi.getValueType(), LHSHi, RHSHi);
}
+void DAGTypeLegalizer::SplitVecRes_IS_FPCLASS(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDLoc DL(N);
+ SDValue ArgLo, ArgHi;
+ SDValue Test = N->getOperand(1);
+ GetSplitVector(N->getOperand(0), ArgLo, ArgHi);
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+
+ Lo = DAG.getNode(ISD::IS_FPCLASS, DL, LoVT, ArgLo, Test, N->getFlags());
+ Hi = DAG.getNode(ISD::IS_FPCLASS, DL, HiVT, ArgHi, Test, N->getFlags());
+}
+
void DAGTypeLegalizer::SplitVecRes_InregOp(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue LHSLo, LHSHi;
@@ -2043,8 +2118,20 @@ void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) {
else
std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
- Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
- Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
+ if (N->getOpcode() == ISD::SETCC) {
+ Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
+ Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
+ } else {
+ assert(N->getOpcode() == ISD::VP_SETCC && "Expected VP_SETCC opcode");
+ SDValue MaskLo, MaskHi, EVLLo, EVLHi;
+ std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(3));
+ std::tie(EVLLo, EVLHi) =
+ DAG.SplitEVL(N->getOperand(4), N->getValueType(0), DL);
+ Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2), MaskLo,
+ EVLLo);
+ Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2), MaskHi,
+ EVLHi);
+ }
}
void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
@@ -2056,22 +2143,37 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
// If the input also splits, handle it directly for a compile time speedup.
// Otherwise split it by hand.
- unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
- EVT InVT = N->getOperand(OpNo).getValueType();
+ EVT InVT = N->getOperand(0).getValueType();
if (getTypeAction(InVT) == TargetLowering::TypeSplitVector)
- GetSplitVector(N->getOperand(OpNo), Lo, Hi);
+ GetSplitVector(N->getOperand(0), Lo, Hi);
else
- std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, OpNo);
+ std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
- if (N->getOpcode() == ISD::FP_ROUND) {
- Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1),
- N->getFlags());
- Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getOperand(1),
- N->getFlags());
- } else {
- Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getFlags());
- Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getFlags());
+ const SDNodeFlags Flags = N->getFlags();
+ unsigned Opcode = N->getOpcode();
+ if (N->getNumOperands() <= 2) {
+ if (Opcode == ISD::FP_ROUND) {
+ Lo = DAG.getNode(Opcode, dl, LoVT, Lo, N->getOperand(1), Flags);
+ Hi = DAG.getNode(Opcode, dl, HiVT, Hi, N->getOperand(1), Flags);
+ } else {
+ Lo = DAG.getNode(Opcode, dl, LoVT, Lo, Flags);
+ Hi = DAG.getNode(Opcode, dl, HiVT, Hi, Flags);
+ }
+ return;
}
+
+ assert(N->getNumOperands() == 3 && "Unexpected number of operands!");
+ assert(N->isVPOpcode() && "Expected VP opcode");
+
+ SDValue MaskLo, MaskHi;
+ std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(1));
+
+ SDValue EVLLo, EVLHi;
+ std::tie(EVLLo, EVLHi) =
+ DAG.SplitEVL(N->getOperand(2), N->getValueType(0), dl);
+
+ Lo = DAG.getNode(Opcode, dl, LoVT, {Lo, MaskLo, EVLLo}, Flags);
+ Hi = DAG.getNode(Opcode, dl, HiVT, {Hi, MaskHi, EVLHi}, Flags);
}
void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo,
@@ -2107,14 +2209,34 @@ void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo,
TLI.isTypeLegal(NewSrcVT) && TLI.isTypeLegal(SplitLoVT)) {
LLVM_DEBUG(dbgs() << "Split vector extend via incremental extend:";
N->dump(&DAG); dbgs() << "\n");
+ if (!N->isVPOpcode()) {
+ // Extend the source vector by one step.
+ SDValue NewSrc =
+ DAG.getNode(N->getOpcode(), dl, NewSrcVT, N->getOperand(0));
+ // Get the low and high halves of the new, extended one step, vector.
+ std::tie(Lo, Hi) = DAG.SplitVector(NewSrc, dl);
+ // Extend those vector halves the rest of the way.
+ Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo);
+ Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi);
+ return;
+ }
+
// Extend the source vector by one step.
SDValue NewSrc =
- DAG.getNode(N->getOpcode(), dl, NewSrcVT, N->getOperand(0));
+ DAG.getNode(N->getOpcode(), dl, NewSrcVT, N->getOperand(0),
+ N->getOperand(1), N->getOperand(2));
// Get the low and high halves of the new, extended one step, vector.
std::tie(Lo, Hi) = DAG.SplitVector(NewSrc, dl);
+
+ SDValue MaskLo, MaskHi;
+ std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(1));
+
+ SDValue EVLLo, EVLHi;
+ std::tie(EVLLo, EVLHi) =
+ DAG.SplitEVL(N->getOperand(2), N->getValueType(0), dl);
// Extend those vector halves the rest of the way.
- Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo);
- Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi);
+ Lo = DAG.getNode(N->getOpcode(), dl, LoVT, {Lo, MaskLo, EVLLo});
+ Hi = DAG.getNode(N->getOpcode(), dl, HiVT, {Hi, MaskHi, EVLHi});
return;
}
}
@@ -2126,108 +2248,352 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
SDValue &Lo, SDValue &Hi) {
// The low and high parts of the original input give four input vectors.
SDValue Inputs[4];
- SDLoc dl(N);
+ SDLoc DL(N);
GetSplitVector(N->getOperand(0), Inputs[0], Inputs[1]);
GetSplitVector(N->getOperand(1), Inputs[2], Inputs[3]);
EVT NewVT = Inputs[0].getValueType();
unsigned NewElts = NewVT.getVectorNumElements();
+ auto &&IsConstant = [](const SDValue &N) {
+ APInt SplatValue;
+ return N.getResNo() == 0 &&
+ (ISD::isConstantSplatVector(N.getNode(), SplatValue) ||
+ ISD::isBuildVectorOfConstantSDNodes(N.getNode()));
+ };
+ auto &&BuildVector = [NewElts, &DAG = DAG, NewVT, &DL](SDValue &Input1,
+ SDValue &Input2,
+ ArrayRef<int> Mask) {
+ assert(Input1->getOpcode() == ISD::BUILD_VECTOR &&
+ Input2->getOpcode() == ISD::BUILD_VECTOR &&
+ "Expected build vector node.");
+ EVT EltVT = NewVT.getVectorElementType();
+ SmallVector<SDValue> Ops(NewElts, DAG.getUNDEF(EltVT));
+ for (unsigned I = 0; I < NewElts; ++I) {
+ if (Mask[I] == UndefMaskElem)
+ continue;
+ unsigned Idx = Mask[I];
+ if (Idx >= NewElts)
+ Ops[I] = Input2.getOperand(Idx - NewElts);
+ else
+ Ops[I] = Input1.getOperand(Idx);
+ // Make the type of all elements the same as the element type.
+ if (Ops[I].getValueType().bitsGT(EltVT))
+ Ops[I] = DAG.getNode(ISD::TRUNCATE, DL, EltVT, Ops[I]);
+ }
+ return DAG.getBuildVector(NewVT, DL, Ops);
+ };
+
// If Lo or Hi uses elements from at most two of the four input vectors, then
// express it as a vector shuffle of those two inputs. Otherwise extract the
// input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
- SmallVector<int, 16> Ops;
- for (unsigned High = 0; High < 2; ++High) {
- SDValue &Output = High ? Hi : Lo;
-
- // Build a shuffle mask for the output, discovering on the fly which
- // input vectors to use as shuffle operands (recorded in InputUsed).
- // If building a suitable shuffle vector proves too hard, then bail
- // out with useBuildVector set.
- unsigned InputUsed[2] = { -1U, -1U }; // Not yet discovered.
- unsigned FirstMaskIdx = High * NewElts;
- bool useBuildVector = false;
- for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
- // The mask element. This indexes into the input.
- int Idx = N->getMaskElt(FirstMaskIdx + MaskOffset);
-
- // The input vector this mask element indexes into.
- unsigned Input = (unsigned)Idx / NewElts;
-
- if (Input >= array_lengthof(Inputs)) {
- // The mask element does not index into any input vector.
- Ops.push_back(-1);
+ SmallVector<int> OrigMask(N->getMask().begin(), N->getMask().end());
+ // Try to pack incoming shuffles/inputs.
+ auto &&TryPeekThroughShufflesInputs = [&Inputs, &NewVT, this, NewElts,
+ &DL](SmallVectorImpl<int> &Mask) {
+ // Check if all inputs are shuffles of the same operands or non-shuffles.
+ MapVector<std::pair<SDValue, SDValue>, SmallVector<unsigned>> ShufflesIdxs;
+ for (unsigned Idx = 0; Idx < array_lengthof(Inputs); ++Idx) {
+ SDValue Input = Inputs[Idx];
+ auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(Input.getNode());
+ if (!Shuffle ||
+ Input.getOperand(0).getValueType() != Input.getValueType())
+ continue;
+ ShufflesIdxs[std::make_pair(Input.getOperand(0), Input.getOperand(1))]
+ .push_back(Idx);
+ ShufflesIdxs[std::make_pair(Input.getOperand(1), Input.getOperand(0))]
+ .push_back(Idx);
+ }
+ for (auto &P : ShufflesIdxs) {
+ if (P.second.size() < 2)
continue;
+ // Use shuffles operands instead of shuffles themselves.
+ // 1. Adjust mask.
+ for (int &Idx : Mask) {
+ if (Idx == UndefMaskElem)
+ continue;
+ unsigned SrcRegIdx = Idx / NewElts;
+ if (Inputs[SrcRegIdx].isUndef()) {
+ Idx = UndefMaskElem;
+ continue;
+ }
+ auto *Shuffle =
+ dyn_cast<ShuffleVectorSDNode>(Inputs[SrcRegIdx].getNode());
+ if (!Shuffle || !is_contained(P.second, SrcRegIdx))
+ continue;
+ int MaskElt = Shuffle->getMaskElt(Idx % NewElts);
+ if (MaskElt == UndefMaskElem) {
+ Idx = UndefMaskElem;
+ continue;
+ }
+ Idx = MaskElt % NewElts +
+ P.second[Shuffle->getOperand(MaskElt / NewElts) == P.first.first
+ ? 0
+ : 1] *
+ NewElts;
}
-
- // Turn the index into an offset from the start of the input vector.
- Idx -= Input * NewElts;
-
- // Find or create a shuffle vector operand to hold this input.
- unsigned OpNo;
- for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) {
- if (InputUsed[OpNo] == Input) {
- // This input vector is already an operand.
- break;
- } else if (InputUsed[OpNo] == -1U) {
- // Create a new operand for this input vector.
- InputUsed[OpNo] = Input;
- break;
+ // 2. Update inputs.
+ Inputs[P.second[0]] = P.first.first;
+ Inputs[P.second[1]] = P.first.second;
+ // Clear the pair data.
+ P.second.clear();
+ ShufflesIdxs[std::make_pair(P.first.second, P.first.first)].clear();
+ }
+ // Check if any concat_vectors can be simplified.
+ SmallBitVector UsedSubVector(2 * array_lengthof(Inputs));
+ for (int &Idx : Mask) {
+ if (Idx == UndefMaskElem)
+ continue;
+ unsigned SrcRegIdx = Idx / NewElts;
+ if (Inputs[SrcRegIdx].isUndef()) {
+ Idx = UndefMaskElem;
+ continue;
+ }
+ TargetLowering::LegalizeTypeAction TypeAction =
+ getTypeAction(Inputs[SrcRegIdx].getValueType());
+ if (Inputs[SrcRegIdx].getOpcode() == ISD::CONCAT_VECTORS &&
+ Inputs[SrcRegIdx].getNumOperands() == 2 &&
+ !Inputs[SrcRegIdx].getOperand(1).isUndef() &&
+ (TypeAction == TargetLowering::TypeLegal ||
+ TypeAction == TargetLowering::TypeWidenVector))
+ UsedSubVector.set(2 * SrcRegIdx + (Idx % NewElts) / (NewElts / 2));
+ }
+ if (UsedSubVector.count() > 1) {
+ SmallVector<SmallVector<std::pair<unsigned, int>, 2>> Pairs;
+ for (unsigned I = 0; I < array_lengthof(Inputs); ++I) {
+ if (UsedSubVector.test(2 * I) == UsedSubVector.test(2 * I + 1))
+ continue;
+ if (Pairs.empty() || Pairs.back().size() == 2)
+ Pairs.emplace_back();
+ if (UsedSubVector.test(2 * I)) {
+ Pairs.back().emplace_back(I, 0);
+ } else {
+ assert(UsedSubVector.test(2 * I + 1) &&
+ "Expected to be used one of the subvectors.");
+ Pairs.back().emplace_back(I, 1);
}
}
-
- if (OpNo >= array_lengthof(InputUsed)) {
- // More than two input vectors used! Give up on trying to create a
- // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
- useBuildVector = true;
- break;
+ if (!Pairs.empty() && Pairs.front().size() > 1) {
+ // Adjust mask.
+ for (int &Idx : Mask) {
+ if (Idx == UndefMaskElem)
+ continue;
+ unsigned SrcRegIdx = Idx / NewElts;
+ auto *It = find_if(
+ Pairs, [SrcRegIdx](ArrayRef<std::pair<unsigned, int>> Idxs) {
+ return Idxs.front().first == SrcRegIdx ||
+ Idxs.back().first == SrcRegIdx;
+ });
+ if (It == Pairs.end())
+ continue;
+ Idx = It->front().first * NewElts + (Idx % NewElts) % (NewElts / 2) +
+ (SrcRegIdx == It->front().first ? 0 : (NewElts / 2));
+ }
+ // Adjust inputs.
+ for (ArrayRef<std::pair<unsigned, int>> Idxs : Pairs) {
+ Inputs[Idxs.front().first] = DAG.getNode(
+ ISD::CONCAT_VECTORS, DL,
+ Inputs[Idxs.front().first].getValueType(),
+ Inputs[Idxs.front().first].getOperand(Idxs.front().second),
+ Inputs[Idxs.back().first].getOperand(Idxs.back().second));
+ }
}
-
- // Add the mask index for the new shuffle vector.
- Ops.push_back(Idx + OpNo * NewElts);
}
-
- if (useBuildVector) {
- EVT EltVT = NewVT.getVectorElementType();
- SmallVector<SDValue, 16> SVOps;
-
- // Extract the input elements by hand.
- for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
- // The mask element. This indexes into the input.
- int Idx = N->getMaskElt(FirstMaskIdx + MaskOffset);
-
- // The input vector this mask element indexes into.
- unsigned Input = (unsigned)Idx / NewElts;
-
- if (Input >= array_lengthof(Inputs)) {
- // The mask element is "undef" or indexes off the end of the input.
- SVOps.push_back(DAG.getUNDEF(EltVT));
+ bool Changed;
+ do {
+ // Try to remove extra shuffles (except broadcasts) and shuffles with the
+ // reused operands.
+ Changed = false;
+ for (unsigned I = 0; I < array_lengthof(Inputs); ++I) {
+ auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(Inputs[I].getNode());
+ if (!Shuffle)
continue;
+ if (Shuffle->getOperand(0).getValueType() != NewVT)
+ continue;
+ int Op = -1;
+ if (!Inputs[I].hasOneUse() && Shuffle->getOperand(1).isUndef() &&
+ !Shuffle->isSplat()) {
+ Op = 0;
+ } else if (!Inputs[I].hasOneUse() &&
+ !Shuffle->getOperand(1).isUndef()) {
+ // Find the only used operand, if possible.
+ for (int &Idx : Mask) {
+ if (Idx == UndefMaskElem)
+ continue;
+ unsigned SrcRegIdx = Idx / NewElts;
+ if (SrcRegIdx != I)
+ continue;
+ int MaskElt = Shuffle->getMaskElt(Idx % NewElts);
+ if (MaskElt == UndefMaskElem) {
+ Idx = UndefMaskElem;
+ continue;
+ }
+ int OpIdx = MaskElt / NewElts;
+ if (Op == -1) {
+ Op = OpIdx;
+ continue;
+ }
+ if (Op != OpIdx) {
+ Op = -1;
+ break;
+ }
+ }
+ }
+ if (Op < 0) {
+ // Try to check if one of the shuffle operands is used already.
+ for (int OpIdx = 0; OpIdx < 2; ++OpIdx) {
+ if (Shuffle->getOperand(OpIdx).isUndef())
+ continue;
+ auto *It = find(Inputs, Shuffle->getOperand(OpIdx));
+ if (It == std::end(Inputs))
+ continue;
+ int FoundOp = std::distance(std::begin(Inputs), It);
+ // Found that operand is used already.
+ // 1. Fix the mask for the reused operand.
+ for (int &Idx : Mask) {
+ if (Idx == UndefMaskElem)
+ continue;
+ unsigned SrcRegIdx = Idx / NewElts;
+ if (SrcRegIdx != I)
+ continue;
+ int MaskElt = Shuffle->getMaskElt(Idx % NewElts);
+ if (MaskElt == UndefMaskElem) {
+ Idx = UndefMaskElem;
+ continue;
+ }
+ int MaskIdx = MaskElt / NewElts;
+ if (OpIdx == MaskIdx)
+ Idx = MaskElt % NewElts + FoundOp * NewElts;
+ }
+ // 2. Set Op to the unused OpIdx.
+ Op = (OpIdx + 1) % 2;
+ break;
+ }
+ }
+ if (Op >= 0) {
+ Changed = true;
+ Inputs[I] = Shuffle->getOperand(Op);
+ // Adjust mask.
+ for (int &Idx : Mask) {
+ if (Idx == UndefMaskElem)
+ continue;
+ unsigned SrcRegIdx = Idx / NewElts;
+ if (SrcRegIdx != I)
+ continue;
+ int MaskElt = Shuffle->getMaskElt(Idx % NewElts);
+ int OpIdx = MaskElt / NewElts;
+ if (OpIdx != Op)
+ continue;
+ Idx = MaskElt % NewElts + SrcRegIdx * NewElts;
+ }
}
-
- // Turn the index into an offset from the start of the input vector.
- Idx -= Input * NewElts;
-
- // Extract the vector element by hand.
- SVOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
- Inputs[Input],
- DAG.getVectorIdxConstant(Idx, dl)));
}
-
- // Construct the Lo/Hi output using a BUILD_VECTOR.
- Output = DAG.getBuildVector(NewVT, dl, SVOps);
- } else if (InputUsed[0] == -1U) {
- // No input vectors were used! The result is undefined.
- Output = DAG.getUNDEF(NewVT);
- } else {
- SDValue Op0 = Inputs[InputUsed[0]];
- // If only one input was used, use an undefined vector for the other.
- SDValue Op1 = InputUsed[1] == -1U ?
- DAG.getUNDEF(NewVT) : Inputs[InputUsed[1]];
- // At least one input vector was used. Create a new shuffle vector.
- Output = DAG.getVectorShuffle(NewVT, dl, Op0, Op1, Ops);
+ } while (Changed);
+ };
+ TryPeekThroughShufflesInputs(OrigMask);
+ // Proces unique inputs.
+ auto &&MakeUniqueInputs = [&Inputs, &IsConstant,
+ NewElts](SmallVectorImpl<int> &Mask) {
+ SetVector<SDValue> UniqueInputs;
+ SetVector<SDValue> UniqueConstantInputs;
+ for (unsigned I = 0; I < array_lengthof(Inputs); ++I) {
+ if (IsConstant(Inputs[I]))
+ UniqueConstantInputs.insert(Inputs[I]);
+ else if (!Inputs[I].isUndef())
+ UniqueInputs.insert(Inputs[I]);
+ }
+ // Adjust mask in case of reused inputs. Also, need to insert constant
+ // inputs at first, otherwise it affects the final outcome.
+ if (UniqueInputs.size() != array_lengthof(Inputs)) {
+ auto &&UniqueVec = UniqueInputs.takeVector();
+ auto &&UniqueConstantVec = UniqueConstantInputs.takeVector();
+ unsigned ConstNum = UniqueConstantVec.size();
+ for (int &Idx : Mask) {
+ if (Idx == UndefMaskElem)
+ continue;
+ unsigned SrcRegIdx = Idx / NewElts;
+ if (Inputs[SrcRegIdx].isUndef()) {
+ Idx = UndefMaskElem;
+ continue;
+ }
+ const auto It = find(UniqueConstantVec, Inputs[SrcRegIdx]);
+ if (It != UniqueConstantVec.end()) {
+ Idx = (Idx % NewElts) +
+ NewElts * std::distance(UniqueConstantVec.begin(), It);
+ assert(Idx >= 0 && "Expected defined mask idx.");
+ continue;
+ }
+ const auto RegIt = find(UniqueVec, Inputs[SrcRegIdx]);
+ assert(RegIt != UniqueVec.end() && "Cannot find non-const value.");
+ Idx = (Idx % NewElts) +
+ NewElts * (std::distance(UniqueVec.begin(), RegIt) + ConstNum);
+ assert(Idx >= 0 && "Expected defined mask idx.");
+ }
+ copy(UniqueConstantVec, std::begin(Inputs));
+ copy(UniqueVec, std::next(std::begin(Inputs), ConstNum));
}
+ };
+ MakeUniqueInputs(OrigMask);
+ SDValue OrigInputs[4];
+ copy(Inputs, std::begin(OrigInputs));
+ for (unsigned High = 0; High < 2; ++High) {
+ SDValue &Output = High ? Hi : Lo;
- Ops.clear();
+ // Build a shuffle mask for the output, discovering on the fly which
+ // input vectors to use as shuffle operands.
+ unsigned FirstMaskIdx = High * NewElts;
+ SmallVector<int> Mask(NewElts * array_lengthof(Inputs), UndefMaskElem);
+ copy(makeArrayRef(OrigMask).slice(FirstMaskIdx, NewElts), Mask.begin());
+ assert(!Output && "Expected default initialized initial value.");
+ TryPeekThroughShufflesInputs(Mask);
+ MakeUniqueInputs(Mask);
+ SDValue TmpInputs[4];
+ copy(Inputs, std::begin(TmpInputs));
+ // Track changes in the output registers.
+ int UsedIdx = -1;
+ bool SecondIteration = false;
+ auto &&AccumulateResults = [&UsedIdx, &SecondIteration](unsigned Idx) {
+ if (UsedIdx < 0) {
+ UsedIdx = Idx;
+ return false;
+ }
+ if (UsedIdx >= 0 && static_cast<unsigned>(UsedIdx) == Idx)
+ SecondIteration = true;
+ return SecondIteration;
+ };
+ processShuffleMasks(
+ Mask, array_lengthof(Inputs), array_lengthof(Inputs),
+ /*NumOfUsedRegs=*/1,
+ [&Output, &DAG = DAG, NewVT]() { Output = DAG.getUNDEF(NewVT); },
+ [&Output, &DAG = DAG, NewVT, &DL, &Inputs,
+ &BuildVector](ArrayRef<int> Mask, unsigned Idx, unsigned /*Unused*/) {
+ if (Inputs[Idx]->getOpcode() == ISD::BUILD_VECTOR)
+ Output = BuildVector(Inputs[Idx], Inputs[Idx], Mask);
+ else
+ Output = DAG.getVectorShuffle(NewVT, DL, Inputs[Idx],
+ DAG.getUNDEF(NewVT), Mask);
+ Inputs[Idx] = Output;
+ },
+ [&AccumulateResults, &Output, &DAG = DAG, NewVT, &DL, &Inputs,
+ &TmpInputs,
+ &BuildVector](ArrayRef<int> Mask, unsigned Idx1, unsigned Idx2) {
+ if (AccumulateResults(Idx1)) {
+ if (Inputs[Idx1]->getOpcode() == ISD::BUILD_VECTOR &&
+ Inputs[Idx2]->getOpcode() == ISD::BUILD_VECTOR)
+ Output = BuildVector(Inputs[Idx1], Inputs[Idx2], Mask);
+ else
+ Output = DAG.getVectorShuffle(NewVT, DL, Inputs[Idx1],
+ Inputs[Idx2], Mask);
+ } else {
+ if (TmpInputs[Idx1]->getOpcode() == ISD::BUILD_VECTOR &&
+ TmpInputs[Idx2]->getOpcode() == ISD::BUILD_VECTOR)
+ Output = BuildVector(TmpInputs[Idx1], TmpInputs[Idx2], Mask);
+ else
+ Output = DAG.getVectorShuffle(NewVT, DL, TmpInputs[Idx1],
+ TmpInputs[Idx2], Mask);
+ }
+ Inputs[Idx1] = Output;
+ });
+ copy(OrigInputs, std::begin(Inputs));
}
}
@@ -2268,6 +2634,32 @@ void DAGTypeLegalizer::SplitVecRes_FP_TO_XINT_SAT(SDNode *N, SDValue &Lo,
Hi = DAG.getNode(N->getOpcode(), dl, DstVTHi, SrcHi, N->getOperand(1));
}
+void DAGTypeLegalizer::SplitVecRes_VECTOR_REVERSE(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue InLo, InHi;
+ GetSplitVector(N->getOperand(0), InLo, InHi);
+ SDLoc DL(N);
+
+ Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, InHi.getValueType(), InHi);
+ Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, InLo.getValueType(), InLo);
+}
+
+void DAGTypeLegalizer::SplitVecRes_VECTOR_SPLICE(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
+
+ SDValue Expanded = TLI.expandVectorSplice(N, DAG);
+ Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, Expanded,
+ DAG.getVectorIdxConstant(0, DL));
+ Hi =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiVT, Expanded,
+ DAG.getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL));
+}
+
//===----------------------------------------------------------------------===//
// Operand Vector Splitting
//===----------------------------------------------------------------------===//
@@ -2294,16 +2686,19 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
report_fatal_error("Do not know how to split this operator's "
"operand!\n");
+ case ISD::VP_SETCC:
case ISD::SETCC: Res = SplitVecOp_VSETCC(N); break;
case ISD::BITCAST: Res = SplitVecOp_BITCAST(N); break;
case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break;
case ISD::INSERT_SUBVECTOR: Res = SplitVecOp_INSERT_SUBVECTOR(N, OpNo); break;
case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break;
case ISD::CONCAT_VECTORS: Res = SplitVecOp_CONCAT_VECTORS(N); break;
+ case ISD::VP_TRUNCATE:
case ISD::TRUNCATE:
Res = SplitVecOp_TruncateHelper(N);
break;
case ISD::STRICT_FP_ROUND:
+ case ISD::VP_FP_ROUND:
case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break;
case ISD::FCOPYSIGN: Res = SplitVecOp_FCOPYSIGN(N); break;
case ISD::STORE:
@@ -2543,6 +2938,14 @@ SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) {
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Ch);
+ } else if (N->getNumOperands() == 3) {
+ assert(N->isVPOpcode() && "Expected VP opcode");
+ SDValue MaskLo, MaskHi, EVLLo, EVLHi;
+ std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(1));
+ std::tie(EVLLo, EVLHi) =
+ DAG.SplitEVL(N->getOperand(2), N->getValueType(0), dl);
+ Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo, MaskLo, EVLLo);
+ Hi = DAG.getNode(N->getOpcode(), dl, OutVT, Hi, MaskHi, EVLHi);
} else {
Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo);
Hi = DAG.getNode(N->getOpcode(), dl, OutVT, Hi);
@@ -3128,8 +3531,20 @@ SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) {
EVT PartResVT = EVT::getVectorVT(Context, MVT::i1, PartEltCnt);
EVT WideResVT = EVT::getVectorVT(Context, MVT::i1, PartEltCnt*2);
- LoRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Lo0, Lo1, N->getOperand(2));
- HiRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Hi0, Hi1, N->getOperand(2));
+ if (N->getOpcode() == ISD::SETCC) {
+ LoRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Lo0, Lo1, N->getOperand(2));
+ HiRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Hi0, Hi1, N->getOperand(2));
+ } else {
+ assert(N->getOpcode() == ISD::VP_SETCC && "Expected VP_SETCC opcode");
+ SDValue MaskLo, MaskHi, EVLLo, EVLHi;
+ std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(3));
+ std::tie(EVLLo, EVLHi) =
+ DAG.SplitEVL(N->getOperand(4), N->getValueType(0), DL);
+ LoRes = DAG.getNode(ISD::VP_SETCC, DL, PartResVT, Lo0, Lo1,
+ N->getOperand(2), MaskLo, EVLLo);
+ HiRes = DAG.getNode(ISD::VP_SETCC, DL, PartResVT, Hi0, Hi1,
+ N->getOperand(2), MaskHi, EVLHi);
+ }
SDValue Con = DAG.getNode(ISD::CONCAT_VECTORS, DL, WideResVT, LoRes, HiRes);
EVT OpVT = N->getOperand(0).getValueType();
@@ -3160,6 +3575,13 @@ SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) {
SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
Lo.getValue(1), Hi.getValue(1));
ReplaceValueWith(SDValue(N, 1), NewChain);
+ } else if (N->getOpcode() == ISD::VP_FP_ROUND) {
+ SDValue MaskLo, MaskHi, EVLLo, EVLHi;
+ std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(1));
+ std::tie(EVLLo, EVLHi) =
+ DAG.SplitEVL(N->getOperand(2), N->getValueType(0), DL);
+ Lo = DAG.getNode(ISD::VP_FP_ROUND, DL, OutVT, Lo, MaskLo, EVLLo);
+ Hi = DAG.getNode(ISD::VP_FP_ROUND, DL, OutVT, Hi, MaskHi, EVLHi);
} else {
Lo = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Lo, N->getOperand(1));
Hi = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Hi, N->getOperand(1));
@@ -3204,6 +3626,22 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
return;
SDValue Res = SDValue();
+
+ auto unrollExpandedOp = [&]() {
+ // We're going to widen this vector op to a legal type by padding with undef
+ // elements. If the wide vector op is eventually going to be expanded to
+ // scalar libcalls, then unroll into scalar ops now to avoid unnecessary
+ // libcalls on the undef elements.
+ EVT VT = N->getValueType(0);
+ EVT WideVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ if (!TLI.isOperationLegalOrCustom(N->getOpcode(), WideVecVT) &&
+ TLI.isOperationExpand(N->getOpcode(), VT.getScalarType())) {
+ Res = DAG.UnrollVectorOp(N, WideVecVT.getVectorNumElements());
+ return true;
+ }
+ return false;
+ };
+
switch (N->getOpcode()) {
default:
#ifndef NDEBUG
@@ -3223,6 +3661,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break;
case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;
case ISD::LOAD: Res = WidenVecRes_LOAD(N); break;
+ case ISD::STEP_VECTOR:
case ISD::SPLAT_VECTOR:
case ISD::SCALAR_TO_VECTOR:
Res = WidenVecRes_ScalarOp(N);
@@ -3235,6 +3674,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
Res = WidenVecRes_Select(N);
break;
case ISD::SELECT_CC: Res = WidenVecRes_SELECT_CC(N); break;
+ case ISD::VP_SETCC:
case ISD::SETCC: Res = WidenVecRes_SETCC(N); break;
case ISD::UNDEF: Res = WidenVecRes_UNDEF(N); break;
case ISD::VECTOR_SHUFFLE:
@@ -3280,6 +3720,10 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::USHLSAT:
case ISD::ROTL:
case ISD::ROTR:
+ case ISD::AVGFLOORS:
+ case ISD::AVGFLOORU:
+ case ISD::AVGCEILS:
+ case ISD::AVGCEILU:
// Vector-predicated binary op widening. Note that -- unlike the
// unpredicated versions -- we don't have to worry about trapping on
// operations like UDIV, FADD, etc., as we pass on the original vector
@@ -3297,12 +3741,19 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
Res = WidenVecRes_Binary(N);
break;
+ case ISD::FPOW:
+ case ISD::FREM:
+ if (unrollExpandedOp())
+ break;
+ // If the target has custom/legal support for the scalar FP intrinsic ops
+ // (they are probably not destined to become libcalls), then widen those
+ // like any other binary ops.
+ LLVM_FALLTHROUGH;
+
case ISD::FADD:
case ISD::FMUL:
- case ISD::FPOW:
case ISD::FSUB:
case ISD::FDIV:
- case ISD::FREM:
case ISD::SDIV:
case ISD::UDIV:
case ISD::SREM:
@@ -3338,6 +3789,10 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
Res = WidenVecRes_FCOPYSIGN(N);
break;
+ case ISD::IS_FPCLASS:
+ Res = WidenVecRes_IS_FPCLASS(N);
+ break;
+
case ISD::FPOWI:
Res = WidenVecRes_POWI(N);
break;
@@ -3350,14 +3805,23 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::ANY_EXTEND:
case ISD::FP_EXTEND:
+ case ISD::VP_FP_EXTEND:
case ISD::FP_ROUND:
+ case ISD::VP_FP_ROUND:
case ISD::FP_TO_SINT:
+ case ISD::VP_FPTOSI:
case ISD::FP_TO_UINT:
+ case ISD::VP_FPTOUI:
case ISD::SIGN_EXTEND:
+ case ISD::VP_SIGN_EXTEND:
case ISD::SINT_TO_FP:
+ case ISD::VP_SITOFP:
+ case ISD::VP_TRUNCATE:
case ISD::TRUNCATE:
case ISD::UINT_TO_FP:
+ case ISD::VP_UITOFP:
case ISD::ZERO_EXTEND:
+ case ISD::VP_ZERO_EXTEND:
Res = WidenVecRes_Convert(N);
break;
@@ -3381,23 +3845,13 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FROUNDEVEN:
case ISD::FSIN:
case ISD::FSQRT:
- case ISD::FTRUNC: {
- // We're going to widen this vector op to a legal type by padding with undef
- // elements. If the wide vector op is eventually going to be expanded to
- // scalar libcalls, then unroll into scalar ops now to avoid unnecessary
- // libcalls on the undef elements.
- EVT VT = N->getValueType(0);
- EVT WideVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
- if (!TLI.isOperationLegalOrCustom(N->getOpcode(), WideVecVT) &&
- TLI.isOperationExpand(N->getOpcode(), VT.getScalarType())) {
- Res = DAG.UnrollVectorOp(N, WideVecVT.getVectorNumElements());
+ case ISD::FTRUNC:
+ if (unrollExpandedOp())
break;
- }
- }
- // If the target has custom/legal support for the scalar FP intrinsic ops
- // (they are probably not destined to become libcalls), then widen those like
- // any other unary ops.
- LLVM_FALLTHROUGH;
+ // If the target has custom/legal support for the scalar FP intrinsic ops
+ // (they are probably not destined to become libcalls), then widen those
+ // like any other unary ops.
+ LLVM_FALLTHROUGH;
case ISD::ABS:
case ISD::BITREVERSE:
@@ -3407,13 +3861,13 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::CTPOP:
case ISD::CTTZ:
case ISD::CTTZ_ZERO_UNDEF:
- case ISD::FNEG:
+ case ISD::FNEG: case ISD::VP_FNEG:
case ISD::FREEZE:
case ISD::ARITH_FENCE:
case ISD::FCANONICALIZE:
Res = WidenVecRes_Unary(N);
break;
- case ISD::FMA:
+ case ISD::FMA: case ISD::VP_FMA:
case ISD::FSHL:
case ISD::FSHR:
Res = WidenVecRes_Ternary(N);
@@ -3432,7 +3886,16 @@ SDValue DAGTypeLegalizer::WidenVecRes_Ternary(SDNode *N) {
SDValue InOp1 = GetWidenedVector(N->getOperand(0));
SDValue InOp2 = GetWidenedVector(N->getOperand(1));
SDValue InOp3 = GetWidenedVector(N->getOperand(2));
- return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3);
+ if (N->getNumOperands() == 3)
+ return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3);
+
+ assert(N->getNumOperands() == 5 && "Unexpected number of operands!");
+ assert(N->isVPOpcode() && "Expected VP opcode");
+
+ SDValue Mask =
+ GetWidenedMask(N->getOperand(3), WidenVT.getVectorElementCount());
+ return DAG.getNode(N->getOpcode(), dl, WidenVT,
+ {InOp1, InOp2, InOp3, Mask, N->getOperand(4)});
}
SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
@@ -3552,7 +4015,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
EVT WidenEltVT = WidenVT.getVectorElementType();
EVT VT = WidenVT;
- unsigned NumElts = VT.getVectorNumElements();
+ unsigned NumElts = VT.getVectorMinNumElements();
const SDNodeFlags Flags = N->getFlags();
while (!TLI.isTypeLegal(VT) && NumElts != 1) {
NumElts = NumElts / 2;
@@ -3566,6 +4029,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, Flags);
}
+ // FIXME: Improve support for scalable vectors.
+ assert(!VT.isScalableVector() && "Scalable vectors not handled yet.");
+
// No legal vector version so unroll the vector operation and then widen.
if (NumElts == 1)
return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
@@ -3826,6 +4292,12 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
if (InVTEC == WidenEC) {
if (N->getNumOperands() == 1)
return DAG.getNode(Opcode, DL, WidenVT, InOp);
+ if (N->getNumOperands() == 3) {
+ assert(N->isVPOpcode() && "Expected VP opcode");
+ SDValue Mask =
+ GetWidenedMask(N->getOperand(1), WidenVT.getVectorElementCount());
+ return DAG.getNode(Opcode, DL, WidenVT, InOp, Mask, N->getOperand(2));
+ }
return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1), Flags);
}
if (WidenVT.getSizeInBits() == InVT.getSizeInBits()) {
@@ -4007,6 +4479,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_FCOPYSIGN(SDNode *N) {
return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
}
+SDValue DAGTypeLegalizer::WidenVecRes_IS_FPCLASS(SDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Arg = GetWidenedVector(N->getOperand(0));
+ return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, {Arg, N->getOperand(1)},
+ N->getFlags());
+}
+
SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) {
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue InOp = GetWidenedVector(N->getOperand(0));
@@ -4018,7 +4497,16 @@ SDValue DAGTypeLegalizer::WidenVecRes_Unary(SDNode *N) {
// Unary op widening.
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue InOp = GetWidenedVector(N->getOperand(0));
- return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp);
+ if (N->getNumOperands() == 1)
+ return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp);
+
+ assert(N->getNumOperands() == 3 && "Unexpected number of operands!");
+ assert(N->isVPOpcode() && "Expected VP opcode");
+
+ SDValue Mask =
+ GetWidenedMask(N->getOperand(1), WidenVT.getVectorElementCount());
+ return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT,
+ {InOp, Mask, N->getOperand(2)});
}
SDValue DAGTypeLegalizer::WidenVecRes_InregOp(SDNode *N) {
@@ -4243,11 +4731,11 @@ SDValue DAGTypeLegalizer::WidenVecRes_INSERT_SUBVECTOR(SDNode *N) {
}
SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
- EVT VT = N->getValueType(0);
- EVT EltVT = VT.getVectorElementType();
- EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
- SDValue InOp = N->getOperand(0);
- SDValue Idx = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue InOp = N->getOperand(0);
+ SDValue Idx = N->getOperand(1);
SDLoc dl(N);
auto InOpTypeAction = getTypeAction(InOp.getValueType());
@@ -4264,6 +4752,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
// Check if we can extract from the vector.
unsigned WidenNumElts = WidenVT.getVectorMinNumElements();
unsigned InNumElts = InVT.getVectorMinNumElements();
+ unsigned VTNumElts = VT.getVectorMinNumElements();
+ assert(IdxVal % VTNumElts == 0 &&
+ "Expected Idx to be a multiple of subvector minimum vector length");
if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts)
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx);
@@ -4277,8 +4768,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
// nxv2i64 extract_subvector(nxv16i64, 8)
// nxv2i64 extract_subvector(nxv16i64, 10)
// undef)
- unsigned VTNElts = VT.getVectorMinNumElements();
- unsigned GCD = greatestCommonDivisor(VTNElts, WidenNumElts);
+ unsigned GCD = greatestCommonDivisor(VTNumElts, WidenNumElts);
assert((IdxVal % GCD) == 0 && "Expected Idx to be a multiple of the broken "
"down type's element count");
EVT PartVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
@@ -4287,7 +4777,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
if (getTypeAction(PartVT) != TargetLowering::TypeWidenVector) {
SmallVector<SDValue> Parts;
unsigned I = 0;
- for (; I < VTNElts / GCD; ++I)
+ for (; I < VTNumElts / GCD; ++I)
Parts.push_back(
DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, PartVT, InOp,
DAG.getVectorIdxConstant(IdxVal + I * GCD, dl)));
@@ -4304,9 +4794,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
// We could try widening the input to the right length but for now, extract
// the original elements, fill the rest with undefs and build a vector.
SmallVector<SDValue, 16> Ops(WidenNumElts);
- unsigned NumElts = VT.getVectorNumElements();
unsigned i;
- for (i = 0; i < NumElts; ++i)
+ for (i = 0; i < VTNumElts; ++i)
Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
DAG.getVectorIdxConstant(IdxVal + i, dl));
@@ -4783,10 +5272,10 @@ SDValue DAGTypeLegalizer::WidenVecRes_Select(SDNode *N) {
SDValue InOp1 = GetWidenedVector(N->getOperand(1));
SDValue InOp2 = GetWidenedVector(N->getOperand(2));
assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT);
- return Opcode == ISD::VP_SELECT || Opcode == ISD::VP_MERGE
- ? DAG.getNode(Opcode, SDLoc(N), WidenVT, Cond1, InOp1, InOp2,
- N->getOperand(3))
- : DAG.getNode(Opcode, SDLoc(N), WidenVT, Cond1, InOp1, InOp2);
+ if (Opcode == ISD::VP_SELECT || Opcode == ISD::VP_MERGE)
+ return DAG.getNode(Opcode, SDLoc(N), WidenVT, Cond1, InOp1, InOp2,
+ N->getOperand(3));
+ return DAG.getNode(Opcode, SDLoc(N), WidenVT, Cond1, InOp1, InOp2);
}
SDValue DAGTypeLegalizer::WidenVecRes_SELECT_CC(SDNode *N) {
@@ -4832,13 +5321,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) {
N->getOperand(0).getValueType().isVector() &&
"Operands must be vectors");
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- unsigned WidenNumElts = WidenVT.getVectorNumElements();
+ ElementCount WidenEC = WidenVT.getVectorElementCount();
SDValue InOp1 = N->getOperand(0);
EVT InVT = InOp1.getValueType();
assert(InVT.isVector() && "can not widen non-vector type");
- EVT WidenInVT = EVT::getVectorVT(*DAG.getContext(),
- InVT.getVectorElementType(), WidenNumElts);
+ EVT WidenInVT =
+ EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), WidenEC);
// The input and output types often differ here, and it could be that while
// we'd prefer to widen the result type, the input operands have been split.
@@ -4865,8 +5354,14 @@ SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) {
InOp2.getValueType() == WidenInVT &&
"Input not widened to expected type!");
(void)WidenInVT;
- return DAG.getNode(ISD::SETCC, SDLoc(N),
- WidenVT, InOp1, InOp2, N->getOperand(2));
+ if (N->getOpcode() == ISD::VP_SETCC) {
+ SDValue Mask =
+ GetWidenedMask(N->getOperand(3), WidenVT.getVectorElementCount());
+ return DAG.getNode(ISD::VP_SETCC, SDLoc(N), WidenVT, InOp1, InOp2,
+ N->getOperand(2), Mask, N->getOperand(4));
+ }
+ return DAG.getNode(ISD::SETCC, SDLoc(N), WidenVT, InOp1, InOp2,
+ N->getOperand(2));
}
SDValue DAGTypeLegalizer::WidenVecRes_STRICT_FSETCC(SDNode *N) {
@@ -4946,6 +5441,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::STRICT_FSETCCS: Res = WidenVecOp_STRICT_FSETCC(N); break;
case ISD::VSELECT: Res = WidenVecOp_VSELECT(N); break;
case ISD::FCOPYSIGN: Res = WidenVecOp_FCOPYSIGN(N); break;
+ case ISD::IS_FPCLASS: Res = WidenVecOp_IS_FPCLASS(N); break;
case ISD::ANY_EXTEND:
case ISD::SIGN_EXTEND:
@@ -5098,6 +5594,34 @@ SDValue DAGTypeLegalizer::WidenVecOp_FCOPYSIGN(SDNode *N) {
return DAG.UnrollVectorOp(N);
}
+SDValue DAGTypeLegalizer::WidenVecOp_IS_FPCLASS(SDNode *N) {
+ SDLoc DL(N);
+ EVT ResultVT = N->getValueType(0);
+ SDValue Test = N->getOperand(1);
+ SDValue WideArg = GetWidenedVector(N->getOperand(0));
+
+ // Process this node similarly to SETCC.
+ EVT WideResultVT = getSetCCResultType(WideArg.getValueType());
+ if (ResultVT.getScalarType() == MVT::i1)
+ WideResultVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
+ WideResultVT.getVectorNumElements());
+
+ SDValue WideNode = DAG.getNode(ISD::IS_FPCLASS, DL, WideResultVT,
+ {WideArg, Test}, N->getFlags());
+
+ // Extract the needed results from the result vector.
+ EVT ResVT =
+ EVT::getVectorVT(*DAG.getContext(), WideResultVT.getVectorElementType(),
+ ResultVT.getVectorNumElements());
+ SDValue CC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResVT, WideNode,
+ DAG.getVectorIdxConstant(0, DL));
+
+ EVT OpVT = N->getOperand(0).getValueType();
+ ISD::NodeType ExtendCode =
+ TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT));
+ return DAG.getNode(ExtendCode, DL, ResultVT, CC);
+}
+
SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
// Since the result is legal and the input is illegal.
EVT VT = N->getValueType(0);
@@ -5192,11 +5716,12 @@ SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) {
SDLoc dl(N);
// Check if we can convert between two legal vector types and extract.
- unsigned InWidenSize = InWidenVT.getSizeInBits();
- unsigned Size = VT.getSizeInBits();
+ TypeSize InWidenSize = InWidenVT.getSizeInBits();
+ TypeSize Size = VT.getSizeInBits();
// x86mmx is not an acceptable vector element type, so don't try.
- if (InWidenSize % Size == 0 && !VT.isVector() && VT != MVT::x86mmx) {
- unsigned NewNumElts = InWidenSize / Size;
+ if (!VT.isVector() && VT != MVT::x86mmx &&
+ InWidenSize.hasKnownScalarFactor(Size)) {
+ unsigned NewNumElts = InWidenSize.getKnownScalarFactor(Size);
EVT NewVT = EVT::getVectorVT(*DAG.getContext(), VT, NewNumElts);
if (TLI.isTypeLegal(NewVT)) {
SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp);
@@ -5211,9 +5736,11 @@ SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) {
// having to copy via memory.
if (VT.isVector()) {
EVT EltVT = VT.getVectorElementType();
- unsigned EltSize = EltVT.getSizeInBits();
- if (InWidenSize % EltSize == 0) {
- unsigned NewNumElts = InWidenSize / EltSize;
+ unsigned EltSize = EltVT.getFixedSizeInBits();
+ if (InWidenSize.isKnownMultipleOf(EltSize)) {
+ ElementCount NewNumElts =
+ (InWidenVT.getVectorElementCount() * InWidenVT.getScalarSizeInBits())
+ .divideCoefficientBy(EltSize);
EVT NewVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NewNumElts);
if (TLI.isTypeLegal(NewVT)) {
SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp);
@@ -5266,18 +5793,17 @@ SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) {
}
SDValue DAGTypeLegalizer::WidenVecOp_INSERT_SUBVECTOR(SDNode *N) {
+ EVT VT = N->getValueType(0);
SDValue SubVec = N->getOperand(1);
SDValue InVec = N->getOperand(0);
- if (getTypeAction(InVec.getValueType()) == TargetLowering::TypeWidenVector)
- InVec = GetWidenedVector(InVec);
-
if (getTypeAction(SubVec.getValueType()) == TargetLowering::TypeWidenVector)
SubVec = GetWidenedVector(SubVec);
- if (SubVec.getValueType() == InVec.getValueType() && InVec.isUndef() &&
+ if (SubVec.getValueType().knownBitsLE(VT) && InVec.isUndef() &&
N->getConstantOperandVal(2) == 0)
- return SubVec;
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, InVec, SubVec,
+ N->getOperand(2));
report_fatal_error("Don't know how to widen the operands for "
"INSERT_SUBVECTOR");
@@ -5500,11 +6026,11 @@ SDValue DAGTypeLegalizer::WidenVecOp_VP_SCATTER(SDNode *N, unsigned OpNo) {
Mask = GetWidenedMask(Mask, WideEC);
WideMemVT = EVT::getVectorVT(*DAG.getContext(),
VPSC->getMemoryVT().getScalarType(), WideEC);
- } else if (OpNo == 4) {
+ } else if (OpNo == 3) {
// Just widen the index. It's allowed to have extra elements.
Index = GetWidenedVector(Index);
} else
- llvm_unreachable("Can't widen this operand of mscatter");
+ llvm_unreachable("Can't widen this operand of VP_SCATTER");
SDValue Ops[] = {
VPSC->getChain(), DataOp, VPSC->getBasePtr(), Index, Scale, Mask,
@@ -5597,8 +6123,20 @@ SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) {
assert(NeutralElem && "Neutral element must exist");
// Pad the vector with the neutral element.
- unsigned OrigElts = OrigVT.getVectorNumElements();
- unsigned WideElts = WideVT.getVectorNumElements();
+ unsigned OrigElts = OrigVT.getVectorMinNumElements();
+ unsigned WideElts = WideVT.getVectorMinNumElements();
+
+ if (WideVT.isScalableVector()) {
+ unsigned GCD = greatestCommonDivisor(OrigElts, WideElts);
+ EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), ElemVT,
+ ElementCount::getScalable(GCD));
+ SDValue SplatNeutral = DAG.getSplatVector(SplatVT, dl, NeutralElem);
+ for (unsigned Idx = OrigElts; Idx < WideElts; Idx = Idx + GCD)
+ Op = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVT, Op, SplatNeutral,
+ DAG.getVectorIdxConstant(Idx, dl));
+ return DAG.getNode(Opc, dl, N->getValueType(0), Op, Flags);
+ }
+
for (unsigned Idx = OrigElts; Idx < WideElts; Idx++)
Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, WideVT, Op, NeutralElem,
DAG.getVectorIdxConstant(Idx, dl));
@@ -5622,8 +6160,20 @@ SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE_SEQ(SDNode *N) {
SDValue NeutralElem = DAG.getNeutralElement(BaseOpc, dl, ElemVT, Flags);
// Pad the vector with the neutral element.
- unsigned OrigElts = OrigVT.getVectorNumElements();
- unsigned WideElts = WideVT.getVectorNumElements();
+ unsigned OrigElts = OrigVT.getVectorMinNumElements();
+ unsigned WideElts = WideVT.getVectorMinNumElements();
+
+ if (WideVT.isScalableVector()) {
+ unsigned GCD = greatestCommonDivisor(OrigElts, WideElts);
+ EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), ElemVT,
+ ElementCount::getScalable(GCD));
+ SDValue SplatNeutral = DAG.getSplatVector(SplatVT, dl, NeutralElem);
+ for (unsigned Idx = OrigElts; Idx < WideElts; Idx = Idx + GCD)
+ Op = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVT, Op, SplatNeutral,
+ DAG.getVectorIdxConstant(Idx, dl));
+ return DAG.getNode(Opc, dl, N->getValueType(0), AccOp, Op, Flags);
+ }
+
for (unsigned Idx = OrigElts; Idx < WideElts; Idx++)
Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, WideVT, Op, NeutralElem,
DAG.getVectorIdxConstant(Idx, dl));
@@ -5795,7 +6345,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
// Allow wider loads if they are sufficiently aligned to avoid memory faults
// and if the original load is simple.
unsigned LdAlign =
- (!LD->isSimple() || LdVT.isScalableVector()) ? 0 : LD->getAlignment();
+ (!LD->isSimple() || LdVT.isScalableVector()) ? 0 : LD->getAlign().value();
// Find the vector type that can load from.
Optional<EVT> FirstVT =
@@ -6103,7 +6653,7 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT,
EVT InVT = InOp.getValueType();
assert(InVT.getVectorElementType() == NVT.getVectorElementType() &&
"input and widen element type must match");
- assert(!InVT.isScalableVector() && !NVT.isScalableVector() &&
+ assert(InVT.isScalableVector() == NVT.isScalableVector() &&
"cannot modify scalable vectors in this way");
SDLoc dl(InOp);
@@ -6111,10 +6661,10 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT,
if (InVT == NVT)
return InOp;
- unsigned InNumElts = InVT.getVectorNumElements();
- unsigned WidenNumElts = NVT.getVectorNumElements();
- if (WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0) {
- unsigned NumConcat = WidenNumElts / InNumElts;
+ ElementCount InEC = InVT.getVectorElementCount();
+ ElementCount WidenEC = NVT.getVectorElementCount();
+ if (WidenEC.hasKnownScalarFactor(InEC)) {
+ unsigned NumConcat = WidenEC.getKnownScalarFactor(InEC);
SmallVector<SDValue, 16> Ops(NumConcat);
SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, InVT) :
DAG.getUNDEF(InVT);
@@ -6125,10 +6675,16 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT,
return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, Ops);
}
- if (WidenNumElts < InNumElts && InNumElts % WidenNumElts)
+ if (InEC.hasKnownScalarFactor(WidenEC))
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, InOp,
DAG.getVectorIdxConstant(0, dl));
+ assert(!InVT.isScalableVector() && !NVT.isScalableVector() &&
+ "Scalable vectors should have been handled already.");
+
+ unsigned InNumElts = InEC.getFixedValue();
+ unsigned WidenNumElts = WidenEC.getFixedValue();
+
// Fall back to extract and build.
SmallVector<SDValue, 16> Ops(WidenNumElts);
EVT EltVT = NVT.getVectorElementType();
@@ -6144,29 +6700,3 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT,
Ops[Idx] = FillVal;
return DAG.getBuildVector(NVT, dl, Ops);
}
-
-void DAGTypeLegalizer::SplitVecRes_VECTOR_REVERSE(SDNode *N, SDValue &Lo,
- SDValue &Hi) {
- SDValue InLo, InHi;
- GetSplitVector(N->getOperand(0), InLo, InHi);
- SDLoc DL(N);
-
- Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, InHi.getValueType(), InHi);
- Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, InLo.getValueType(), InLo);
-}
-
-void DAGTypeLegalizer::SplitVecRes_VECTOR_SPLICE(SDNode *N, SDValue &Lo,
- SDValue &Hi) {
- EVT VT = N->getValueType(0);
- SDLoc DL(N);
-
- EVT LoVT, HiVT;
- std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
-
- SDValue Expanded = TLI.expandVectorSplice(N, DAG);
- Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, Expanded,
- DAG.getVectorIdxConstant(0, DL));
- Hi =
- DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiVT, Expanded,
- DAG.getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL));
-}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
index 3d5c4c5b1cae..e0e8d503ca92 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -20,7 +20,6 @@
#include "llvm/CodeGen/ResourcePriorityQueue.h"
#include "llvm/CodeGen/DFAPacketizer.h"
-#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
@@ -28,21 +27,18 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
using namespace llvm;
#define DEBUG_TYPE "scheduler"
-static cl::opt<bool> DisableDFASched("disable-dfa-sched", cl::Hidden,
- cl::ZeroOrMore, cl::init(false),
- cl::desc("Disable use of DFA during scheduling"));
+static cl::opt<bool>
+ DisableDFASched("disable-dfa-sched", cl::Hidden,
+ cl::desc("Disable use of DFA during scheduling"));
static cl::opt<int> RegPressureThreshold(
- "dfa-sched-reg-pressure-threshold", cl::Hidden, cl::ZeroOrMore, cl::init(5),
- cl::desc("Track reg pressure and switch priority to in-depth"));
+ "dfa-sched-reg-pressure-threshold", cl::Hidden, cl::init(5),
+ cl::desc("Track reg pressure and switch priority to in-depth"));
ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS)
: Picker(this), InstrItins(IS->MF->getSubtarget().getInstrItineraryData()) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
index f64b332a7fef..9fcf692babdc 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
@@ -14,6 +14,7 @@
#define LLVM_LIB_CODEGEN_SELECTIONDAG_SDNODEDBGVALUE_H
#include "llvm/IR/DebugLoc.h"
+#include "llvm/Support/Allocator.h"
#include "llvm/Support/DataTypes.h"
#include <utility>
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 1a6be0cc2091..78fc407e9573 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -11,16 +11,14 @@
//===----------------------------------------------------------------------===//
#include "InstrEmitter.h"
-#include "ScheduleDAGSDNodes.h"
#include "SDNodeDbgValue.h"
-#include "llvm/ADT/STLExtras.h"
+#include "ScheduleDAGSDNodes.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -442,17 +440,29 @@ static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
/// CheckForLiveRegDef - Return true and update live register vector if the
/// specified register def of the specified SUnit clobbers any "live" registers.
static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg,
- std::vector<SUnit*> &LiveRegDefs,
+ std::vector<SUnit *> &LiveRegDefs,
SmallSet<unsigned, 4> &RegAdded,
SmallVectorImpl<unsigned> &LRegs,
- const TargetRegisterInfo *TRI) {
+ const TargetRegisterInfo *TRI,
+ const SDNode *Node = nullptr) {
bool Added = false;
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
- if (LiveRegDefs[*AI] && LiveRegDefs[*AI] != SU) {
- if (RegAdded.insert(*AI).second) {
- LRegs.push_back(*AI);
- Added = true;
- }
+ // Check if Ref is live.
+ if (!LiveRegDefs[*AI])
+ continue;
+
+ // Allow multiple uses of the same def.
+ if (LiveRegDefs[*AI] == SU)
+ continue;
+
+ // Allow multiple uses of same def
+ if (Node && LiveRegDefs[*AI]->getNode() == Node)
+ continue;
+
+ // Add Reg to the set of interfering live regs.
+ if (RegAdded.insert(*AI).second) {
+ LRegs.push_back(*AI);
+ Added = true;
}
}
return Added;
@@ -504,6 +514,15 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,
}
continue;
}
+
+ if (Node->getOpcode() == ISD::CopyToReg) {
+ Register Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
+ if (Reg.isPhysical()) {
+ SDNode *SrcNode = Node->getOperand(2).getNode();
+ CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI, SrcNode);
+ }
+ }
+
if (!Node->isMachineOpcode())
continue;
const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 7a5e8ac6075e..8a04ce7535a1 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -1294,11 +1294,11 @@ static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
/// CheckForLiveRegDef - Return true and update live register vector if the
/// specified register def of the specified SUnit clobbers any "live" registers.
-static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
- SUnit **LiveRegDefs,
+static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, SUnit **LiveRegDefs,
SmallSet<unsigned, 4> &RegAdded,
SmallVectorImpl<unsigned> &LRegs,
- const TargetRegisterInfo *TRI) {
+ const TargetRegisterInfo *TRI,
+ const SDNode *Node = nullptr) {
for (MCRegAliasIterator AliasI(Reg, TRI, true); AliasI.isValid(); ++AliasI) {
// Check if Ref is live.
@@ -1307,6 +1307,10 @@ static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
// Allow multiple uses of the same def.
if (LiveRegDefs[*AliasI] == SU) continue;
+ // Allow multiple uses of same def
+ if (Node && LiveRegDefs[*AliasI]->getNode() == Node)
+ continue;
+
// Add Reg to the set of interfering live regs.
if (RegAdded.insert(*AliasI).second) {
LRegs.push_back(*AliasI);
@@ -1387,6 +1391,15 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
continue;
}
+ if (Node->getOpcode() == ISD::CopyToReg) {
+ Register Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
+ if (Reg.isPhysical()) {
+ SDNode *SrcNode = Node->getOperand(2).getNode();
+ CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI,
+ SrcNode);
+ }
+ }
+
if (!Node->isMachineOpcode())
continue;
// If we're in the middle of scheduling a call, don't begin scheduling
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 92897aca7f6b..2a10157b404e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -884,7 +884,7 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
if (MI->isCandidateForCallSiteEntry() &&
DAG->getTarget().Options.EmitCallSiteInfo)
- MF.addCallArgsForwardingRegs(MI, DAG->getSDCallSiteInfo(Node));
+ MF.addCallArgsForwardingRegs(MI, DAG->getCallSiteInfo(Node));
if (DAG->getNoMergeSiteInfo(Node)) {
MI->setFlag(MachineInstr::MIFlag::NoMerge);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
index 10940478010e..1ba1fd65b8c9 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
@@ -19,19 +19,15 @@
#include "ScheduleDAGSDNodes.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/LatencyPriorityQueue.h"
#include "llvm/CodeGen/ResourcePriorityQueue.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
-#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include <climits>
using namespace llvm;
#define DEBUG_TYPE "pre-RA-sched"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 40d861702e86..b3b8756ae9ba 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -24,9 +24,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
-#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/MemoryLocation.h"
-#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
@@ -55,7 +53,6 @@
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Type.h"
-#include "llvm/IR/Value.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/Compiler.h"
@@ -144,11 +141,11 @@ bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal) {
unsigned EltSize =
N->getValueType(0).getVectorElementType().getSizeInBits();
if (auto *Op0 = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
- SplatVal = Op0->getAPIntValue().truncOrSelf(EltSize);
+ SplatVal = Op0->getAPIntValue().trunc(EltSize);
return true;
}
if (auto *Op0 = dyn_cast<ConstantFPSDNode>(N->getOperand(0))) {
- SplatVal = Op0->getValueAPF().bitcastToAPInt().truncOrSelf(EltSize);
+ SplatVal = Op0->getValueAPF().bitcastToAPInt().trunc(EltSize);
return true;
}
}
@@ -714,6 +711,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
ID.AddInteger(LD->getMemoryVT().getRawBits());
ID.AddInteger(LD->getRawSubclassData());
ID.AddInteger(LD->getPointerInfo().getAddrSpace());
+ ID.AddInteger(LD->getMemOperand()->getFlags());
break;
}
case ISD::STORE: {
@@ -721,6 +719,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
ID.AddInteger(ST->getMemoryVT().getRawBits());
ID.AddInteger(ST->getRawSubclassData());
ID.AddInteger(ST->getPointerInfo().getAddrSpace());
+ ID.AddInteger(ST->getMemOperand()->getFlags());
break;
}
case ISD::VP_LOAD: {
@@ -728,6 +727,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
ID.AddInteger(ELD->getMemoryVT().getRawBits());
ID.AddInteger(ELD->getRawSubclassData());
ID.AddInteger(ELD->getPointerInfo().getAddrSpace());
+ ID.AddInteger(ELD->getMemOperand()->getFlags());
break;
}
case ISD::VP_STORE: {
@@ -735,6 +735,21 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
ID.AddInteger(EST->getMemoryVT().getRawBits());
ID.AddInteger(EST->getRawSubclassData());
ID.AddInteger(EST->getPointerInfo().getAddrSpace());
+ ID.AddInteger(EST->getMemOperand()->getFlags());
+ break;
+ }
+ case ISD::EXPERIMENTAL_VP_STRIDED_LOAD: {
+ const VPStridedLoadSDNode *SLD = cast<VPStridedLoadSDNode>(N);
+ ID.AddInteger(SLD->getMemoryVT().getRawBits());
+ ID.AddInteger(SLD->getRawSubclassData());
+ ID.AddInteger(SLD->getPointerInfo().getAddrSpace());
+ break;
+ }
+ case ISD::EXPERIMENTAL_VP_STRIDED_STORE: {
+ const VPStridedStoreSDNode *SST = cast<VPStridedStoreSDNode>(N);
+ ID.AddInteger(SST->getMemoryVT().getRawBits());
+ ID.AddInteger(SST->getRawSubclassData());
+ ID.AddInteger(SST->getPointerInfo().getAddrSpace());
break;
}
case ISD::VP_GATHER: {
@@ -742,6 +757,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
ID.AddInteger(EG->getMemoryVT().getRawBits());
ID.AddInteger(EG->getRawSubclassData());
ID.AddInteger(EG->getPointerInfo().getAddrSpace());
+ ID.AddInteger(EG->getMemOperand()->getFlags());
break;
}
case ISD::VP_SCATTER: {
@@ -749,6 +765,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
ID.AddInteger(ES->getMemoryVT().getRawBits());
ID.AddInteger(ES->getRawSubclassData());
ID.AddInteger(ES->getPointerInfo().getAddrSpace());
+ ID.AddInteger(ES->getMemOperand()->getFlags());
break;
}
case ISD::MLOAD: {
@@ -756,6 +773,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
ID.AddInteger(MLD->getMemoryVT().getRawBits());
ID.AddInteger(MLD->getRawSubclassData());
ID.AddInteger(MLD->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MLD->getMemOperand()->getFlags());
break;
}
case ISD::MSTORE: {
@@ -763,6 +781,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
ID.AddInteger(MST->getMemoryVT().getRawBits());
ID.AddInteger(MST->getRawSubclassData());
ID.AddInteger(MST->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MST->getMemOperand()->getFlags());
break;
}
case ISD::MGATHER: {
@@ -770,6 +789,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
ID.AddInteger(MG->getMemoryVT().getRawBits());
ID.AddInteger(MG->getRawSubclassData());
ID.AddInteger(MG->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MG->getMemOperand()->getFlags());
break;
}
case ISD::MSCATTER: {
@@ -777,6 +797,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
ID.AddInteger(MS->getMemoryVT().getRawBits());
ID.AddInteger(MS->getRawSubclassData());
ID.AddInteger(MS->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MS->getMemOperand()->getFlags());
break;
}
case ISD::ATOMIC_CMP_SWAP:
@@ -799,11 +820,13 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
ID.AddInteger(AT->getMemoryVT().getRawBits());
ID.AddInteger(AT->getRawSubclassData());
ID.AddInteger(AT->getPointerInfo().getAddrSpace());
+ ID.AddInteger(AT->getMemOperand()->getFlags());
break;
}
case ISD::PREFETCH: {
const MemSDNode *PF = cast<MemSDNode>(N);
ID.AddInteger(PF->getPointerInfo().getAddrSpace());
+ ID.AddInteger(PF->getMemOperand()->getFlags());
break;
}
case ISD::VECTOR_SHUFFLE: {
@@ -821,11 +844,18 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
ID.AddInteger(BA->getTargetFlags());
break;
}
+ case ISD::AssertAlign:
+ ID.AddInteger(cast<AssertAlignSDNode>(N)->getAlign().value());
+ break;
} // end switch (N->getOpcode())
- // Target specific memory nodes could also have address spaces to check.
- if (N->isTargetMemoryOpcode())
- ID.AddInteger(cast<MemSDNode>(N)->getPointerInfo().getAddrSpace());
+ // Target specific memory nodes could also have address spaces and flags
+ // to check.
+ if (N->isTargetMemoryOpcode()) {
+ const MemSDNode *MN = cast<MemSDNode>(N);
+ ID.AddInteger(MN->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MN->getMemOperand()->getFlags());
+ }
}
/// AddNodeIDNode - Generic routine for adding a nodes info to the NodeID
@@ -1395,6 +1425,12 @@ SDValue SelectionDAG::getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT) {
return getNode(ISD::XOR, DL, VT, Val, TrueValue);
}
+SDValue SelectionDAG::getVPLogicalNOT(const SDLoc &DL, SDValue Val,
+ SDValue Mask, SDValue EVL, EVT VT) {
+ SDValue TrueValue = getBoolConstant(true, DL, VT, VT);
+ return getNode(ISD::VP_XOR, DL, VT, Val, TrueValue, Mask, EVL);
+}
+
SDValue SelectionDAG::getBoolConstant(bool V, const SDLoc &DL, EVT VT,
EVT OpVT) {
if (!V)
@@ -2433,23 +2469,9 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits) {
if (VT.isScalableVector())
return SDValue();
- APInt DemandedElts = VT.isVector()
- ? APInt::getAllOnes(VT.getVectorNumElements())
- : APInt(1, 1);
- return GetDemandedBits(V, DemandedBits, DemandedElts);
-}
-
-/// See if the specified operand can be simplified with the knowledge that only
-/// the bits specified by DemandedBits are used in the elements specified by
-/// DemandedElts.
-/// TODO: really we should be making this into the DAG equivalent of
-/// SimplifyMultipleUseDemandedBits and not generate any new nodes.
-SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits,
- const APInt &DemandedElts) {
switch (V.getOpcode()) {
default:
- return TLI->SimplifyMultipleUseDemandedBits(V, DemandedBits, DemandedElts,
- *this);
+ return TLI->SimplifyMultipleUseDemandedBits(V, DemandedBits, *this);
case ISD::Constant: {
const APInt &CVal = cast<ConstantSDNode>(V)->getAPIntValue();
APInt NewVal = CVal & DemandedBits;
@@ -2469,8 +2491,8 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits,
if (Amt >= DemandedBits.getBitWidth())
break;
APInt SrcDemandedBits = DemandedBits << Amt;
- if (SDValue SimplifyLHS =
- GetDemandedBits(V.getOperand(0), SrcDemandedBits))
+ if (SDValue SimplifyLHS = TLI->SimplifyMultipleUseDemandedBits(
+ V.getOperand(0), SrcDemandedBits, *this))
return getNode(ISD::SRL, SDLoc(V), V.getValueType(), SimplifyLHS,
V.getOperand(1));
}
@@ -2503,6 +2525,14 @@ bool SelectionDAG::MaskedValueIsZero(SDValue V, const APInt &Mask,
return Mask.isSubsetOf(computeKnownBits(V, DemandedElts, Depth).Zero);
}
+/// MaskedVectorIsZero - Return true if 'Op' is known to be zero in
+/// DemandedElts. We use this predicate to simplify operations downstream.
+bool SelectionDAG::MaskedVectorIsZero(SDValue V, const APInt &DemandedElts,
+ unsigned Depth /* = 0 */) const {
+ APInt Mask = APInt::getAllOnes(V.getScalarValueSizeInBits());
+ return Mask.isSubsetOf(computeKnownBits(V, DemandedElts, Depth).Zero);
+}
+
/// MaskedValueIsAllOnes - Return true if '(Op & Mask) == Mask'.
bool SelectionDAG::MaskedValueIsAllOnes(SDValue V, const APInt &Mask,
unsigned Depth) const {
@@ -2587,9 +2617,9 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
return true;
}
case ISD::VECTOR_SHUFFLE: {
- // Check if this is a shuffle node doing a splat.
- // TODO: Do we need to handle shuffle(splat, undef, mask)?
- int SplatIndex = -1;
+ // Check if this is a shuffle node doing a splat or a shuffle of a splat.
+ APInt DemandedLHS = APInt::getNullValue(NumElts);
+ APInt DemandedRHS = APInt::getNullValue(NumElts);
ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(V)->getMask();
for (int i = 0; i != (int)NumElts; ++i) {
int M = Mask[i];
@@ -2599,11 +2629,30 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
}
if (!DemandedElts[i])
continue;
- if (0 <= SplatIndex && SplatIndex != M)
- return false;
- SplatIndex = M;
+ if (M < (int)NumElts)
+ DemandedLHS.setBit(M);
+ else
+ DemandedRHS.setBit(M - NumElts);
}
- return true;
+
+ // If we aren't demanding either op, assume there's no splat.
+ // If we are demanding both ops, assume there's no splat.
+ if ((DemandedLHS.isZero() && DemandedRHS.isZero()) ||
+ (!DemandedLHS.isZero() && !DemandedRHS.isZero()))
+ return false;
+
+ // See if the demanded elts of the source op is a splat or we only demand
+ // one element, which should always be a splat.
+ // TODO: Handle source ops splats with undefs.
+ auto CheckSplatSrc = [&](SDValue Src, const APInt &SrcElts) {
+ APInt SrcUndefs;
+ return (SrcElts.countPopulation() == 1) ||
+ (isSplatValue(Src, SrcElts, SrcUndefs, Depth + 1) &&
+ (SrcElts & SrcUndefs).isZero());
+ };
+ if (!DemandedLHS.isZero())
+ return CheckSplatSrc(V.getOperand(0), DemandedLHS);
+ return CheckSplatSrc(V.getOperand(1), DemandedRHS);
}
case ISD::EXTRACT_SUBVECTOR: {
// Offset the demanded elts by the subvector index.
@@ -2614,7 +2663,7 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
uint64_t Idx = V.getConstantOperandVal(1);
unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
APInt UndefSrcElts;
- APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
+ APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
if (isSplatValue(Src, DemandedSrcElts, UndefSrcElts, Depth + 1)) {
UndefElts = UndefSrcElts.extractBits(NumElts, Idx);
return true;
@@ -2631,9 +2680,49 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
return false;
unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
APInt UndefSrcElts;
- APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts);
+ APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts);
if (isSplatValue(Src, DemandedSrcElts, UndefSrcElts, Depth + 1)) {
- UndefElts = UndefSrcElts.truncOrSelf(NumElts);
+ UndefElts = UndefSrcElts.trunc(NumElts);
+ return true;
+ }
+ break;
+ }
+ case ISD::BITCAST: {
+ SDValue Src = V.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ unsigned SrcBitWidth = SrcVT.getScalarSizeInBits();
+ unsigned BitWidth = VT.getScalarSizeInBits();
+
+ // Ignore bitcasts from unsupported types.
+ // TODO: Add fp support?
+ if (!SrcVT.isVector() || !SrcVT.isInteger() || !VT.isInteger())
+ break;
+
+ // Bitcast 'small element' vector to 'large element' vector.
+ if ((BitWidth % SrcBitWidth) == 0) {
+ // See if each sub element is a splat.
+ unsigned Scale = BitWidth / SrcBitWidth;
+ unsigned NumSrcElts = SrcVT.getVectorNumElements();
+ APInt ScaledDemandedElts =
+ APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
+ for (unsigned I = 0; I != Scale; ++I) {
+ APInt SubUndefElts;
+ APInt SubDemandedElt = APInt::getOneBitSet(Scale, I);
+ APInt SubDemandedElts = APInt::getSplat(NumSrcElts, SubDemandedElt);
+ SubDemandedElts &= ScaledDemandedElts;
+ if (!isSplatValue(Src, SubDemandedElts, SubUndefElts, Depth + 1))
+ return false;
+
+ // Here we can't do "MatchAnyBits" operation merge for undef bits.
+ // Because some operation only use part value of the source.
+ // Take llvm.fshl.* for example:
+ // t1: v4i32 = Constant:i32<12>, undef:i32, Constant:i32<12>, undef:i32
+ // t2: v2i64 = bitcast t1
+ // t5: v2i64 = fshl t3, t4, t2
+ // We can not convert t2 to {i64 undef, i64 undef}
+ UndefElts |= APIntOps::ScaleBitMask(SubUndefElts, NumElts,
+ /*MatchAllBits=*/true);
+ }
return true;
}
break;
@@ -2978,7 +3067,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
break;
uint64_t Idx = Op.getConstantOperandVal(1);
unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
- APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
+ APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
Known = computeKnownBits(Src, DemandedSrcElts, Depth + 1);
break;
}
@@ -3083,9 +3172,18 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
bool SelfMultiply = Op.getOperand(0) == Op.getOperand(1);
// TODO: SelfMultiply can be poison, but not undef.
- SelfMultiply &= isGuaranteedNotToBeUndefOrPoison(
- Op.getOperand(0), DemandedElts, false, Depth + 1);
+ if (SelfMultiply)
+ SelfMultiply &= isGuaranteedNotToBeUndefOrPoison(
+ Op.getOperand(0), DemandedElts, false, Depth + 1);
Known = KnownBits::mul(Known, Known2, SelfMultiply);
+
+ // If the multiplication is known not to overflow, the product of a number
+ // with itself is non-negative. Only do this if we didn't already computed
+ // the opposite value for the sign bit.
+ if (Op->getFlags().hasNoSignedWrap() &&
+ Op.getOperand(0) == Op.getOperand(1) &&
+ !Known.isNegative())
+ Known.makeNonNegative();
break;
}
case ISD::MULHU: {
@@ -3128,6 +3226,16 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known = KnownBits::udiv(Known, Known2);
break;
}
+ case ISD::AVGCEILU: {
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known = Known.zext(BitWidth + 1);
+ Known2 = Known2.zext(BitWidth + 1);
+ KnownBits One = KnownBits::makeConstant(APInt(1, 1));
+ Known = KnownBits::computeForAddCarry(Known, Known2, One);
+ Known = Known.extractBits(BitWidth, 1);
+ break;
+ }
case ISD::SELECT:
case ISD::VSELECT:
Known = computeKnownBits(Op.getOperand(2), DemandedElts, Depth+1);
@@ -3330,7 +3438,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
}
case ISD::ZERO_EXTEND_VECTOR_INREG: {
EVT InVT = Op.getOperand(0).getValueType();
- APInt InDemandedElts = DemandedElts.zextOrSelf(InVT.getVectorNumElements());
+ APInt InDemandedElts = DemandedElts.zext(InVT.getVectorNumElements());
Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1);
Known = Known.zext(BitWidth);
break;
@@ -3342,7 +3450,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
}
case ISD::SIGN_EXTEND_VECTOR_INREG: {
EVT InVT = Op.getOperand(0).getValueType();
- APInt InDemandedElts = DemandedElts.zextOrSelf(InVT.getVectorNumElements());
+ APInt InDemandedElts = DemandedElts.zext(InVT.getVectorNumElements());
Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1);
// If the sign bit is known to be zero or one, then sext will extend
// it to the top bits, else it will just zext.
@@ -3358,7 +3466,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
}
case ISD::ANY_EXTEND_VECTOR_INREG: {
EVT InVT = Op.getOperand(0).getValueType();
- APInt InDemandedElts = DemandedElts.zextOrSelf(InVT.getVectorNumElements());
+ APInt InDemandedElts = DemandedElts.zext(InVT.getVectorNumElements());
Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1);
Known = Known.anyext(BitWidth);
break;
@@ -3605,6 +3713,19 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known = KnownBits::smax(Known, Known2);
else
Known = KnownBits::smin(Known, Known2);
+
+ // For SMAX, if CstLow is non-negative we know the result will be
+ // non-negative and thus all sign bits are 0.
+ // TODO: There's an equivalent of this for smin with negative constant for
+ // known ones.
+ if (IsMax && CstLow) {
+ const APInt &ValueLow = CstLow->getAPIntValue();
+ if (ValueLow.isNonNegative()) {
+ unsigned SignBits = ComputeNumSignBits(Op.getOperand(0), Depth + 1);
+ Known.Zero.setHighBits(std::min(SignBits, ValueLow.getNumSignBits()));
+ }
+ }
+
break;
}
case ISD::FP_TO_UINT_SAT: {
@@ -3905,7 +4026,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
case ISD::SIGN_EXTEND_VECTOR_INREG: {
SDValue Src = Op.getOperand(0);
EVT SrcVT = Src.getValueType();
- APInt DemandedSrcElts = DemandedElts.zextOrSelf(SrcVT.getVectorNumElements());
+ APInt DemandedSrcElts = DemandedElts.zext(SrcVT.getVectorNumElements());
Tmp = VTBits - SrcVT.getScalarSizeInBits();
return ComputeNumSignBits(Src, DemandedSrcElts, Depth+1) + Tmp;
}
@@ -4192,7 +4313,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
break;
uint64_t Idx = Op.getConstantOperandVal(1);
unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
- APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
+ APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
return ComputeNumSignBits(Src, DemandedSrcElts, Depth + 1);
}
case ISD::CONCAT_VECTORS: {
@@ -4585,26 +4706,54 @@ bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const {
return false;
}
+// Only bits set in Mask must be negated, other bits may be arbitrary.
+SDValue llvm::getBitwiseNotOperand(SDValue V, SDValue Mask, bool AllowUndefs) {
+ if (isBitwiseNot(V, AllowUndefs))
+ return V.getOperand(0);
+
+ // Handle any_extend (not (truncate X)) pattern, where Mask only sets
+ // bits in the non-extended part.
+ ConstantSDNode *MaskC = isConstOrConstSplat(Mask);
+ if (!MaskC || V.getOpcode() != ISD::ANY_EXTEND)
+ return SDValue();
+ SDValue ExtArg = V.getOperand(0);
+ if (ExtArg.getScalarValueSizeInBits() >=
+ MaskC->getAPIntValue().getActiveBits() &&
+ isBitwiseNot(ExtArg, AllowUndefs) &&
+ ExtArg.getOperand(0).getOpcode() == ISD::TRUNCATE &&
+ ExtArg.getOperand(0).getOperand(0).getValueType() == V.getValueType())
+ return ExtArg.getOperand(0).getOperand(0);
+ return SDValue();
+}
+
+static bool haveNoCommonBitsSetCommutative(SDValue A, SDValue B) {
+ // Match masked merge pattern (X & ~M) op (Y & M)
+ // Including degenerate case (X & ~M) op M
+ auto MatchNoCommonBitsPattern = [&](SDValue Not, SDValue Mask,
+ SDValue Other) {
+ if (SDValue NotOperand =
+ getBitwiseNotOperand(Not, Mask, /* AllowUndefs */ true)) {
+ if (Other == NotOperand)
+ return true;
+ if (Other->getOpcode() == ISD::AND)
+ return NotOperand == Other->getOperand(0) ||
+ NotOperand == Other->getOperand(1);
+ }
+ return false;
+ };
+ if (A->getOpcode() == ISD::AND)
+ return MatchNoCommonBitsPattern(A->getOperand(0), A->getOperand(1), B) ||
+ MatchNoCommonBitsPattern(A->getOperand(1), A->getOperand(0), B);
+ return false;
+}
+
// FIXME: unify with llvm::haveNoCommonBitsSet.
bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const {
assert(A.getValueType() == B.getValueType() &&
"Values must have the same type");
- // Match masked merge pattern (X & ~M) op (Y & M)
- if (A->getOpcode() == ISD::AND && B->getOpcode() == ISD::AND) {
- auto MatchNoCommonBitsPattern = [&](SDValue NotM, SDValue And) {
- if (isBitwiseNot(NotM, true)) {
- SDValue NotOperand = NotM->getOperand(0);
- return NotOperand == And->getOperand(0) ||
- NotOperand == And->getOperand(1);
- }
- return false;
- };
- if (MatchNoCommonBitsPattern(A->getOperand(0), B) ||
- MatchNoCommonBitsPattern(A->getOperand(1), B) ||
- MatchNoCommonBitsPattern(B->getOperand(0), A) ||
- MatchNoCommonBitsPattern(B->getOperand(1), A))
- return true;
- }
+ if (haveNoCommonBitsSetCommutative(A, B) ||
+ haveNoCommonBitsSetCommutative(B, A))
+ return true;
return KnownBits::haveNoCommonBitsSet(computeKnownBits(A),
computeKnownBits(B));
}
@@ -4833,9 +4982,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::CTTZ_ZERO_UNDEF:
return getConstant(Val.countTrailingZeros(), DL, VT, C->isTargetOpcode(),
C->isOpaque());
- case ISD::FP16_TO_FP: {
+ case ISD::FP16_TO_FP:
+ case ISD::BF16_TO_FP: {
bool Ignored;
- APFloat FPV(APFloat::IEEEhalf(),
+ APFloat FPV(Opcode == ISD::FP16_TO_FP ? APFloat::IEEEhalf()
+ : APFloat::BFloat(),
(Val.getBitWidth() == 16) ? Val : Val.trunc(16));
// This can return overflow, underflow, or inexact; we don't care.
@@ -4909,11 +5060,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
if (VT == MVT::i64 && C->getValueType(0) == MVT::f64)
return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT);
break;
- case ISD::FP_TO_FP16: {
+ case ISD::FP_TO_FP16:
+ case ISD::FP_TO_BF16: {
bool Ignored;
// This can return overflow, underflow, or inexact; we don't care.
// FIXME need to be more flexible about rounding mode.
- (void)V.convert(APFloat::IEEEhalf(),
+ (void)V.convert(Opcode == ISD::FP_TO_FP16 ? APFloat::IEEEhalf()
+ : APFloat::BFloat(),
APFloat::rmNearestTiesToEven, &Ignored);
return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT);
}
@@ -4965,6 +5118,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
break;
case ISD::FREEZE:
assert(VT == Operand.getValueType() && "Unexpected VT!");
+ if (isGuaranteedNotToBeUndefOrPoison(Operand))
+ return Operand;
break;
case ISD::TokenFactor:
case ISD::MERGE_VALUES:
@@ -5114,7 +5269,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
assert(VT.isInteger() && VT == Operand.getValueType() &&
"Invalid ABS!");
if (OpOpcode == ISD::UNDEF)
- return getUNDEF(VT);
+ return getConstant(0, DL, VT);
break;
case ISD::BSWAP:
assert(VT.isInteger() && VT == Operand.getValueType() &&
@@ -5182,6 +5337,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
if (Operand.getValueType().getScalarType() == MVT::i1)
return getNOT(DL, Operand, Operand.getValueType());
break;
+ case ISD::VECREDUCE_ADD:
+ if (Operand.getValueType().getScalarType() == MVT::i1)
+ return getNode(ISD::VECREDUCE_XOR, DL, VT, Operand);
+ break;
case ISD::VECREDUCE_SMIN:
case ISD::VECREDUCE_UMAX:
if (Operand.getValueType().getScalarType() == MVT::i1)
@@ -5273,6 +5432,30 @@ static llvm::Optional<APInt> FoldValue(unsigned Opcode, const APInt &C1,
APInt C2Ext = C2.zext(FullWidth);
return (C1Ext * C2Ext).extractBits(C1.getBitWidth(), C1.getBitWidth());
}
+ case ISD::AVGFLOORS: {
+ unsigned FullWidth = C1.getBitWidth() + 1;
+ APInt C1Ext = C1.sext(FullWidth);
+ APInt C2Ext = C2.sext(FullWidth);
+ return (C1Ext + C2Ext).extractBits(C1.getBitWidth(), 1);
+ }
+ case ISD::AVGFLOORU: {
+ unsigned FullWidth = C1.getBitWidth() + 1;
+ APInt C1Ext = C1.zext(FullWidth);
+ APInt C2Ext = C2.zext(FullWidth);
+ return (C1Ext + C2Ext).extractBits(C1.getBitWidth(), 1);
+ }
+ case ISD::AVGCEILS: {
+ unsigned FullWidth = C1.getBitWidth() + 1;
+ APInt C1Ext = C1.sext(FullWidth);
+ APInt C2Ext = C2.sext(FullWidth);
+ return (C1Ext + C2Ext + 1).extractBits(C1.getBitWidth(), 1);
+ }
+ case ISD::AVGCEILU: {
+ unsigned FullWidth = C1.getBitWidth() + 1;
+ APInt C1Ext = C1.zext(FullWidth);
+ APInt C2Ext = C2.zext(FullWidth);
+ return (C1Ext + C2Ext + 1).extractBits(C1.getBitWidth(), 1);
+ }
}
return llvm::None;
}
@@ -5355,7 +5538,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
if (!FoldAttempt)
return SDValue();
- SDValue Folded = getConstant(FoldAttempt.getValue(), DL, VT);
+ SDValue Folded = getConstant(*FoldAttempt, DL, VT);
assert((!Folded || !VT.isVector()) &&
"Can't fold vectors ops with scalar operands");
return Folded;
@@ -5400,7 +5583,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
Optional<APInt> Fold = FoldValue(Opcode, RawBits1[I], RawBits2[I]);
if (!Fold)
break;
- RawBits.push_back(Fold.getValue());
+ RawBits.push_back(*Fold);
}
if (RawBits.size() == NumElts.getFixedValue()) {
// We have constant folded, but we need to cast this again back to
@@ -5416,7 +5599,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
for (unsigned I = 0, E = DstBits.size(); I != E; ++I) {
if (DstUndefs[I])
continue;
- Ops[I] = getConstant(DstBits[I].sextOrSelf(BVEltBits), DL, BVEltVT);
+ Ops[I] = getConstant(DstBits[I].sext(BVEltBits), DL, BVEltVT);
}
return getBitcast(VT, getBuildVector(BVVT, DL, Ops));
}
@@ -5455,9 +5638,14 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
!llvm::all_of(Ops, IsScalarOrSameVectorSize))
return SDValue();
- // If we are comparing vectors, then the result needs to be a i1 boolean
- // that is then sign-extended back to the legal result type.
+ // If we are comparing vectors, then the result needs to be a i1 boolean that
+ // is then extended back to the legal result type depending on how booleans
+ // are represented.
EVT SVT = (Opcode == ISD::SETCC ? MVT::i1 : VT.getScalarType());
+ ISD::NodeType ExtendCode =
+ (Opcode == ISD::SETCC && SVT != VT.getScalarType())
+ ? TargetLowering::getExtendForContent(TLI->getBooleanContents(VT))
+ : ISD::SIGN_EXTEND;
// Find legal integer scalar type for constant promotion and
// ensure that its scalar size is at least as large as source.
@@ -5515,7 +5703,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
// Legalize the (integer) scalar constant if necessary.
if (LegalSVT != SVT)
- ScalarResult = getNode(ISD::SIGN_EXTEND, DL, LegalSVT, ScalarResult);
+ ScalarResult = getNode(ExtendCode, DL, LegalSVT, ScalarResult);
// Scalar folding only succeeded if the result is a constant or UNDEF.
if (!ScalarResult.isUndef() && ScalarResult.getOpcode() != ISD::Constant &&
@@ -5639,20 +5827,34 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
return getNode(Opcode, DL, VT, N1, N2, Flags);
}
+void SelectionDAG::canonicalizeCommutativeBinop(unsigned Opcode, SDValue &N1,
+ SDValue &N2) const {
+ if (!TLI->isCommutativeBinOp(Opcode))
+ return;
+
+ // Canonicalize:
+ // binop(const, nonconst) -> binop(nonconst, const)
+ bool IsN1C = isConstantIntBuildVectorOrConstantInt(N1);
+ bool IsN2C = isConstantIntBuildVectorOrConstantInt(N2);
+ bool IsN1CFP = isConstantFPBuildVectorOrConstantFP(N1);
+ bool IsN2CFP = isConstantFPBuildVectorOrConstantFP(N2);
+ if ((IsN1C && !IsN2C) || (IsN1CFP && !IsN2CFP))
+ std::swap(N1, N2);
+
+ // Canonicalize:
+ // binop(splat(x), step_vector) -> binop(step_vector, splat(x))
+ else if (N1.getOpcode() == ISD::SPLAT_VECTOR &&
+ N2.getOpcode() == ISD::STEP_VECTOR)
+ std::swap(N1, N2);
+}
+
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
SDValue N1, SDValue N2, const SDNodeFlags Flags) {
assert(N1.getOpcode() != ISD::DELETED_NODE &&
N2.getOpcode() != ISD::DELETED_NODE &&
"Operand is DELETED_NODE!");
- // Canonicalize constant to RHS if commutative.
- if (TLI->isCommutativeBinOp(Opcode)) {
- bool IsN1C = isConstantIntBuildVectorOrConstantInt(N1);
- bool IsN2C = isConstantIntBuildVectorOrConstantInt(N2);
- bool IsN1CFP = isConstantFPBuildVectorOrConstantFP(N1);
- bool IsN2CFP = isConstantFPBuildVectorOrConstantFP(N2);
- if ((IsN1C && !IsN2C) || (IsN1CFP && !IsN2CFP))
- std::swap(N1, N2);
- }
+
+ canonicalizeCommutativeBinop(Opcode, N1, N2);
auto *N1C = dyn_cast<ConstantSDNode>(N1);
auto *N2C = dyn_cast<ConstantSDNode>(N2);
@@ -5956,6 +6158,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
if (N1Op2C->getZExtValue() == N2C->getZExtValue()) {
if (VT == N1.getOperand(1).getValueType())
return N1.getOperand(1);
+ if (VT.isFloatingPoint()) {
+ assert(VT.getSizeInBits() > N1.getOperand(1).getValueType().getSizeInBits());
+ return getFPExtendOrRound(N1.getOperand(1), DL, VT);
+ }
return getSExtOrTrunc(N1.getOperand(1), DL, VT);
}
return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0), N2);
@@ -6053,9 +6259,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
std::swap(N1, N2);
} else {
switch (Opcode) {
- case ISD::SIGN_EXTEND_INREG:
case ISD::SUB:
return getUNDEF(VT); // fold op(undef, arg2) -> undef
+ case ISD::SIGN_EXTEND_INREG:
case ISD::UDIV:
case ISD::SDIV:
case ISD::UREM:
@@ -6544,7 +6750,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
if (!TRI->hasStackRealignment(MF))
while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
- NewAlign = NewAlign / 2;
+ NewAlign = NewAlign.previous();
if (NewAlign > Alignment) {
// Give the stack frame object a larger alignment if needed.
@@ -6792,17 +6998,18 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
/// \param Size Number of bytes to write.
/// \param Alignment Alignment of the destination in bytes.
/// \param isVol True if destination is volatile.
+/// \param AlwaysInline Makes sure no function call is generated.
/// \param DstPtrInfo IR information on the memory pointer.
/// \returns New head in the control flow, if lowering was successful, empty
/// SDValue otherwise.
///
/// The function tries to replace 'llvm.memset' intrinsic with several store
/// operations and value calculation code. This is usually profitable for small
-/// memory size.
+/// memory size or when the semantic requires inlining.
static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
SDValue Chain, SDValue Dst, SDValue Src,
uint64_t Size, Align Alignment, bool isVol,
- MachinePointerInfo DstPtrInfo,
+ bool AlwaysInline, MachinePointerInfo DstPtrInfo,
const AAMDNodes &AAInfo) {
// Turn a memset of undef to nop.
// FIXME: We need to honor volatile even is Src is undef.
@@ -6822,8 +7029,10 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
DstAlignCanChange = true;
bool IsZeroVal =
isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isZero();
+ unsigned Limit = AlwaysInline ? ~0 : TLI.getMaxStoresPerMemset(OptSize);
+
if (!TLI.findOptimalMemOpLowering(
- MemOps, TLI.getMaxStoresPerMemset(OptSize),
+ MemOps, Limit,
MemOp::Set(Size, DstAlignCanChange, Alignment, IsZeroVal, isVol),
DstPtrInfo.getAddrSpace(), ~0u, MF.getFunction().getAttributes()))
return SDValue();
@@ -6974,10 +7183,9 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst,
}
SDValue SelectionDAG::getAtomicMemcpy(SDValue Chain, const SDLoc &dl,
- SDValue Dst, unsigned DstAlign,
- SDValue Src, unsigned SrcAlign,
- SDValue Size, Type *SizeTy,
- unsigned ElemSz, bool isTailCall,
+ SDValue Dst, SDValue Src, SDValue Size,
+ Type *SizeTy, unsigned ElemSz,
+ bool isTailCall,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo) {
// Emit a library call.
@@ -7077,10 +7285,9 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst,
}
SDValue SelectionDAG::getAtomicMemmove(SDValue Chain, const SDLoc &dl,
- SDValue Dst, unsigned DstAlign,
- SDValue Src, unsigned SrcAlign,
- SDValue Size, Type *SizeTy,
- unsigned ElemSz, bool isTailCall,
+ SDValue Dst, SDValue Src, SDValue Size,
+ Type *SizeTy, unsigned ElemSz,
+ bool isTailCall,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo) {
// Emit a library call.
@@ -7119,7 +7326,7 @@ SDValue SelectionDAG::getAtomicMemmove(SDValue Chain, const SDLoc &dl,
SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
SDValue Src, SDValue Size, Align Alignment,
- bool isVol, bool isTailCall,
+ bool isVol, bool AlwaysInline, bool isTailCall,
MachinePointerInfo DstPtrInfo,
const AAMDNodes &AAInfo) {
// Check to see if we should lower the memset to stores first.
@@ -7132,7 +7339,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
SDValue Result = getMemsetStores(*this, dl, Chain, Dst, Src,
ConstantSize->getZExtValue(), Alignment,
- isVol, DstPtrInfo, AAInfo);
+ isVol, false, DstPtrInfo, AAInfo);
if (Result.getNode())
return Result;
@@ -7142,45 +7349,75 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
// code. If the target chooses to do this, this is the next best.
if (TSI) {
SDValue Result = TSI->EmitTargetCodeForMemset(
- *this, dl, Chain, Dst, Src, Size, Alignment, isVol, DstPtrInfo);
+ *this, dl, Chain, Dst, Src, Size, Alignment, isVol, AlwaysInline, DstPtrInfo);
if (Result.getNode())
return Result;
}
+ // If we really need inline code and the target declined to provide it,
+ // use a (potentially long) sequence of loads and stores.
+ if (AlwaysInline) {
+ assert(ConstantSize && "AlwaysInline requires a constant size!");
+ SDValue Result = getMemsetStores(*this, dl, Chain, Dst, Src,
+ ConstantSize->getZExtValue(), Alignment,
+ isVol, true, DstPtrInfo, AAInfo);
+ assert(Result &&
+ "getMemsetStores must return a valid sequence when AlwaysInline");
+ return Result;
+ }
+
checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace());
// Emit a library call.
- TargetLowering::ArgListTy Args;
- TargetLowering::ArgListEntry Entry;
- Entry.Node = Dst; Entry.Ty = Type::getInt8PtrTy(*getContext());
- Args.push_back(Entry);
- Entry.Node = Src;
- Entry.Ty = Src.getValueType().getTypeForEVT(*getContext());
- Args.push_back(Entry);
- Entry.Node = Size;
- Entry.Ty = getDataLayout().getIntPtrType(*getContext());
- Args.push_back(Entry);
+ auto &Ctx = *getContext();
+ const auto& DL = getDataLayout();
- // FIXME: pass in SDLoc
TargetLowering::CallLoweringInfo CLI(*this);
- CLI.setDebugLoc(dl)
- .setChain(Chain)
- .setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMSET),
- Dst.getValueType().getTypeForEVT(*getContext()),
- getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET),
- TLI->getPointerTy(getDataLayout())),
- std::move(Args))
- .setDiscardResult()
- .setTailCall(isTailCall);
+ // FIXME: pass in SDLoc
+ CLI.setDebugLoc(dl).setChain(Chain);
+
+ ConstantSDNode *ConstantSrc = dyn_cast<ConstantSDNode>(Src);
+ const bool SrcIsZero = ConstantSrc && ConstantSrc->isZero();
+ const char *BzeroName = getTargetLoweringInfo().getLibcallName(RTLIB::BZERO);
+
+ // Helper function to create an Entry from Node and Type.
+ const auto CreateEntry = [](SDValue Node, Type *Ty) {
+ TargetLowering::ArgListEntry Entry;
+ Entry.Node = Node;
+ Entry.Ty = Ty;
+ return Entry;
+ };
- std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
+ // If zeroing out and bzero is present, use it.
+ if (SrcIsZero && BzeroName) {
+ TargetLowering::ArgListTy Args;
+ Args.push_back(CreateEntry(Dst, Type::getInt8PtrTy(Ctx)));
+ Args.push_back(CreateEntry(Size, DL.getIntPtrType(Ctx)));
+ CLI.setLibCallee(
+ TLI->getLibcallCallingConv(RTLIB::BZERO), Type::getVoidTy(Ctx),
+ getExternalSymbol(BzeroName, TLI->getPointerTy(DL)), std::move(Args));
+ } else {
+ TargetLowering::ArgListTy Args;
+ Args.push_back(CreateEntry(Dst, Type::getInt8PtrTy(Ctx)));
+ Args.push_back(CreateEntry(Src, Src.getValueType().getTypeForEVT(Ctx)));
+ Args.push_back(CreateEntry(Size, DL.getIntPtrType(Ctx)));
+ CLI.setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMSET),
+ Dst.getValueType().getTypeForEVT(Ctx),
+ getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET),
+ TLI->getPointerTy(DL)),
+ std::move(Args));
+ }
+
+ CLI.setDiscardResult().setTailCall(isTailCall);
+
+ std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI);
return CallResult.second;
}
SDValue SelectionDAG::getAtomicMemset(SDValue Chain, const SDLoc &dl,
- SDValue Dst, unsigned DstAlign,
- SDValue Value, SDValue Size, Type *SizeTy,
- unsigned ElemSz, bool isTailCall,
+ SDValue Dst, SDValue Value, SDValue Size,
+ Type *SizeTy, unsigned ElemSz,
+ bool isTailCall,
MachinePointerInfo DstPtrInfo) {
// Emit a library call.
TargetLowering::ArgListTy Args;
@@ -7224,6 +7461,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
ID.AddInteger(MemVT.getRawBits());
AddNodeIDNode(ID, Opcode, VTList, Ops);
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MMO->getFlags());
void* IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
cast<AtomicSDNode>(E)->refineAlignment(MMO);
@@ -7336,6 +7574,7 @@ SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl,
ID.AddInteger(getSyntheticNodeSubclassData<MemIntrinsicSDNode>(
Opcode, dl.getIROrder(), VTList, MemVT, MMO));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MMO->getFlags());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
cast<MemIntrinsicSDNode>(E)->refineAlignment(MMO);
@@ -7508,6 +7747,7 @@ SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
ID.AddInteger(getSyntheticNodeSubclassData<LoadSDNode>(
dl.getIROrder(), VTs, AM, ExtType, MemVT, MMO));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MMO->getFlags());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
cast<LoadSDNode>(E)->refineAlignment(MMO);
@@ -7609,6 +7849,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val,
ID.AddInteger(getSyntheticNodeSubclassData<StoreSDNode>(
dl.getIROrder(), VTs, ISD::UNINDEXED, false, VT, MMO));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MMO->getFlags());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
cast<StoreSDNode>(E)->refineAlignment(MMO);
@@ -7675,6 +7916,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val,
ID.AddInteger(getSyntheticNodeSubclassData<StoreSDNode>(
dl.getIROrder(), VTs, ISD::UNINDEXED, true, SVT, MMO));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MMO->getFlags());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
cast<StoreSDNode>(E)->refineAlignment(MMO);
@@ -7703,6 +7945,7 @@ SDValue SelectionDAG::getIndexedStore(SDValue OrigStore, const SDLoc &dl,
ID.AddInteger(ST->getMemoryVT().getRawBits());
ID.AddInteger(ST->getRawSubclassData());
ID.AddInteger(ST->getPointerInfo().getAddrSpace());
+ ID.AddInteger(ST->getMemOperand()->getFlags());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP))
return SDValue(E, 0);
@@ -7760,6 +8003,7 @@ SDValue SelectionDAG::getLoadVP(ISD::MemIndexedMode AM,
ID.AddInteger(getSyntheticNodeSubclassData<VPLoadSDNode>(
dl.getIROrder(), VTs, AM, ExtType, IsExpanding, MemVT, MMO));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MMO->getFlags());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
cast<VPLoadSDNode>(E)->refineAlignment(MMO);
@@ -7852,6 +8096,7 @@ SDValue SelectionDAG::getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val,
ID.AddInteger(getSyntheticNodeSubclassData<VPStoreSDNode>(
dl.getIROrder(), VTs, AM, IsTruncating, IsCompressing, MemVT, MMO));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MMO->getFlags());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
cast<VPStoreSDNode>(E)->refineAlignment(MMO);
@@ -7922,6 +8167,7 @@ SDValue SelectionDAG::getTruncStoreVP(SDValue Chain, const SDLoc &dl,
ID.AddInteger(getSyntheticNodeSubclassData<VPStoreSDNode>(
dl.getIROrder(), VTs, ISD::UNINDEXED, true, IsCompressing, SVT, MMO));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MMO->getFlags());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
cast<VPStoreSDNode>(E)->refineAlignment(MMO);
@@ -7952,6 +8198,7 @@ SDValue SelectionDAG::getIndexedStoreVP(SDValue OrigStore, const SDLoc &dl,
ID.AddInteger(ST->getMemoryVT().getRawBits());
ID.AddInteger(ST->getRawSubclassData());
ID.AddInteger(ST->getPointerInfo().getAddrSpace());
+ ID.AddInteger(ST->getMemOperand()->getFlags());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP))
return SDValue(E, 0);
@@ -7968,6 +8215,259 @@ SDValue SelectionDAG::getIndexedStoreVP(SDValue OrigStore, const SDLoc &dl,
return V;
}
+SDValue SelectionDAG::getStridedLoadVP(
+ ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &DL,
+ SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask,
+ SDValue EVL, MachinePointerInfo PtrInfo, EVT MemVT, Align Alignment,
+ MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo,
+ const MDNode *Ranges, bool IsExpanding) {
+ assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
+
+ MMOFlags |= MachineMemOperand::MOLoad;
+ assert((MMOFlags & MachineMemOperand::MOStore) == 0);
+ // If we don't have a PtrInfo, infer the trivial frame index case to simplify
+ // clients.
+ if (PtrInfo.V.isNull())
+ PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr, Offset);
+
+ uint64_t Size = MemoryLocation::UnknownSize;
+ MachineFunction &MF = getMachineFunction();
+ MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, MMOFlags, Size,
+ Alignment, AAInfo, Ranges);
+ return getStridedLoadVP(AM, ExtType, VT, DL, Chain, Ptr, Offset, Stride, Mask,
+ EVL, MemVT, MMO, IsExpanding);
+}
+
+SDValue SelectionDAG::getStridedLoadVP(
+ ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &DL,
+ SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask,
+ SDValue EVL, EVT MemVT, MachineMemOperand *MMO, bool IsExpanding) {
+ bool Indexed = AM != ISD::UNINDEXED;
+ assert((Indexed || Offset.isUndef()) && "Unindexed load with an offset!");
+
+ SDValue Ops[] = {Chain, Ptr, Offset, Stride, Mask, EVL};
+ SDVTList VTs = Indexed ? getVTList(VT, Ptr.getValueType(), MVT::Other)
+ : getVTList(VT, MVT::Other);
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::EXPERIMENTAL_VP_STRIDED_LOAD, VTs, Ops);
+ ID.AddInteger(VT.getRawBits());
+ ID.AddInteger(getSyntheticNodeSubclassData<VPStridedLoadSDNode>(
+ DL.getIROrder(), VTs, AM, ExtType, IsExpanding, MemVT, MMO));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) {
+ cast<VPStridedLoadSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+
+ auto *N =
+ newSDNode<VPStridedLoadSDNode>(DL.getIROrder(), DL.getDebugLoc(), VTs, AM,
+ ExtType, IsExpanding, MemVT, MMO);
+ createOperands(N, Ops);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getStridedLoadVP(
+ EVT VT, const SDLoc &DL, SDValue Chain, SDValue Ptr, SDValue Stride,
+ SDValue Mask, SDValue EVL, MachinePointerInfo PtrInfo, MaybeAlign Alignment,
+ MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo,
+ const MDNode *Ranges, bool IsExpanding) {
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ return getStridedLoadVP(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, DL, Chain, Ptr,
+ Undef, Stride, Mask, EVL, PtrInfo, VT, Alignment,
+ MMOFlags, AAInfo, Ranges, IsExpanding);
+}
+
+SDValue SelectionDAG::getStridedLoadVP(EVT VT, const SDLoc &DL, SDValue Chain,
+ SDValue Ptr, SDValue Stride,
+ SDValue Mask, SDValue EVL,
+ MachineMemOperand *MMO,
+ bool IsExpanding) {
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ return getStridedLoadVP(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, DL, Chain, Ptr,
+ Undef, Stride, Mask, EVL, VT, MMO, IsExpanding);
+}
+
+SDValue SelectionDAG::getExtStridedLoadVP(
+ ISD::LoadExtType ExtType, const SDLoc &DL, EVT VT, SDValue Chain,
+ SDValue Ptr, SDValue Stride, SDValue Mask, SDValue EVL,
+ MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment,
+ MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo,
+ bool IsExpanding) {
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ return getStridedLoadVP(ISD::UNINDEXED, ExtType, VT, DL, Chain, Ptr, Undef,
+ Stride, Mask, EVL, PtrInfo, MemVT, Alignment,
+ MMOFlags, AAInfo, nullptr, IsExpanding);
+}
+
+SDValue SelectionDAG::getExtStridedLoadVP(
+ ISD::LoadExtType ExtType, const SDLoc &DL, EVT VT, SDValue Chain,
+ SDValue Ptr, SDValue Stride, SDValue Mask, SDValue EVL, EVT MemVT,
+ MachineMemOperand *MMO, bool IsExpanding) {
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ return getStridedLoadVP(ISD::UNINDEXED, ExtType, VT, DL, Chain, Ptr, Undef,
+ Stride, Mask, EVL, MemVT, MMO, IsExpanding);
+}
+
+SDValue SelectionDAG::getIndexedStridedLoadVP(SDValue OrigLoad, const SDLoc &DL,
+ SDValue Base, SDValue Offset,
+ ISD::MemIndexedMode AM) {
+ auto *SLD = cast<VPStridedLoadSDNode>(OrigLoad);
+ assert(SLD->getOffset().isUndef() &&
+ "Strided load is already a indexed load!");
+ // Don't propagate the invariant or dereferenceable flags.
+ auto MMOFlags =
+ SLD->getMemOperand()->getFlags() &
+ ~(MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable);
+ return getStridedLoadVP(
+ AM, SLD->getExtensionType(), OrigLoad.getValueType(), DL, SLD->getChain(),
+ Base, Offset, SLD->getStride(), SLD->getMask(), SLD->getVectorLength(),
+ SLD->getPointerInfo(), SLD->getMemoryVT(), SLD->getAlign(), MMOFlags,
+ SLD->getAAInfo(), nullptr, SLD->isExpandingLoad());
+}
+
+SDValue SelectionDAG::getStridedStoreVP(SDValue Chain, const SDLoc &DL,
+ SDValue Val, SDValue Ptr,
+ SDValue Offset, SDValue Stride,
+ SDValue Mask, SDValue EVL, EVT MemVT,
+ MachineMemOperand *MMO,
+ ISD::MemIndexedMode AM,
+ bool IsTruncating, bool IsCompressing) {
+ assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
+ bool Indexed = AM != ISD::UNINDEXED;
+ assert((Indexed || Offset.isUndef()) && "Unindexed vp_store with an offset!");
+ SDVTList VTs = Indexed ? getVTList(Ptr.getValueType(), MVT::Other)
+ : getVTList(MVT::Other);
+ SDValue Ops[] = {Chain, Val, Ptr, Offset, Stride, Mask, EVL};
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::EXPERIMENTAL_VP_STRIDED_STORE, VTs, Ops);
+ ID.AddInteger(MemVT.getRawBits());
+ ID.AddInteger(getSyntheticNodeSubclassData<VPStridedStoreSDNode>(
+ DL.getIROrder(), VTs, AM, IsTruncating, IsCompressing, MemVT, MMO));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) {
+ cast<VPStridedStoreSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ auto *N = newSDNode<VPStridedStoreSDNode>(DL.getIROrder(), DL.getDebugLoc(),
+ VTs, AM, IsTruncating,
+ IsCompressing, MemVT, MMO);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getTruncStridedStoreVP(
+ SDValue Chain, const SDLoc &DL, SDValue Val, SDValue Ptr, SDValue Stride,
+ SDValue Mask, SDValue EVL, MachinePointerInfo PtrInfo, EVT SVT,
+ Align Alignment, MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo,
+ bool IsCompressing) {
+ assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
+
+ MMOFlags |= MachineMemOperand::MOStore;
+ assert((MMOFlags & MachineMemOperand::MOLoad) == 0);
+
+ if (PtrInfo.V.isNull())
+ PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr);
+
+ MachineFunction &MF = getMachineFunction();
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ PtrInfo, MMOFlags, MemoryLocation::UnknownSize, Alignment, AAInfo);
+ return getTruncStridedStoreVP(Chain, DL, Val, Ptr, Stride, Mask, EVL, SVT,
+ MMO, IsCompressing);
+}
+
+SDValue SelectionDAG::getTruncStridedStoreVP(SDValue Chain, const SDLoc &DL,
+ SDValue Val, SDValue Ptr,
+ SDValue Stride, SDValue Mask,
+ SDValue EVL, EVT SVT,
+ MachineMemOperand *MMO,
+ bool IsCompressing) {
+ EVT VT = Val.getValueType();
+
+ assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
+ if (VT == SVT)
+ return getStridedStoreVP(Chain, DL, Val, Ptr, getUNDEF(Ptr.getValueType()),
+ Stride, Mask, EVL, VT, MMO, ISD::UNINDEXED,
+ /*IsTruncating*/ false, IsCompressing);
+
+ assert(SVT.getScalarType().bitsLT(VT.getScalarType()) &&
+ "Should only be a truncating store, not extending!");
+ assert(VT.isInteger() == SVT.isInteger() && "Can't do FP-INT conversion!");
+ assert(VT.isVector() == SVT.isVector() &&
+ "Cannot use trunc store to convert to or from a vector!");
+ assert((!VT.isVector() ||
+ VT.getVectorElementCount() == SVT.getVectorElementCount()) &&
+ "Cannot use trunc store to change the number of vector elements!");
+
+ SDVTList VTs = getVTList(MVT::Other);
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ SDValue Ops[] = {Chain, Val, Ptr, Undef, Stride, Mask, EVL};
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::EXPERIMENTAL_VP_STRIDED_STORE, VTs, Ops);
+ ID.AddInteger(SVT.getRawBits());
+ ID.AddInteger(getSyntheticNodeSubclassData<VPStridedStoreSDNode>(
+ DL.getIROrder(), VTs, ISD::UNINDEXED, true, IsCompressing, SVT, MMO));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) {
+ cast<VPStridedStoreSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ auto *N = newSDNode<VPStridedStoreSDNode>(DL.getIROrder(), DL.getDebugLoc(),
+ VTs, ISD::UNINDEXED, true,
+ IsCompressing, SVT, MMO);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getIndexedStridedStoreVP(SDValue OrigStore,
+ const SDLoc &DL, SDValue Base,
+ SDValue Offset,
+ ISD::MemIndexedMode AM) {
+ auto *SST = cast<VPStridedStoreSDNode>(OrigStore);
+ assert(SST->getOffset().isUndef() &&
+ "Strided store is already an indexed store!");
+ SDVTList VTs = getVTList(Base.getValueType(), MVT::Other);
+ SDValue Ops[] = {
+ SST->getChain(), SST->getValue(), Base, Offset, SST->getStride(),
+ SST->getMask(), SST->getVectorLength()};
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::EXPERIMENTAL_VP_STRIDED_STORE, VTs, Ops);
+ ID.AddInteger(SST->getMemoryVT().getRawBits());
+ ID.AddInteger(SST->getRawSubclassData());
+ ID.AddInteger(SST->getPointerInfo().getAddrSpace());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<VPStridedStoreSDNode>(
+ DL.getIROrder(), DL.getDebugLoc(), VTs, AM, SST->isTruncatingStore(),
+ SST->isCompressingStore(), SST->getMemoryVT(), SST->getMemOperand());
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
SDValue SelectionDAG::getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl,
ArrayRef<SDValue> Ops, MachineMemOperand *MMO,
ISD::MemIndexType IndexType) {
@@ -7979,6 +8479,7 @@ SDValue SelectionDAG::getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl,
ID.AddInteger(getSyntheticNodeSubclassData<VPGatherSDNode>(
dl.getIROrder(), VTs, VT, MMO, IndexType));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MMO->getFlags());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
cast<VPGatherSDNode>(E)->refineAlignment(MMO);
@@ -8022,6 +8523,7 @@ SDValue SelectionDAG::getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl,
ID.AddInteger(getSyntheticNodeSubclassData<VPScatterSDNode>(
dl.getIROrder(), VTs, VT, MMO, IndexType));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MMO->getFlags());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
cast<VPScatterSDNode>(E)->refineAlignment(MMO);
@@ -8071,6 +8573,7 @@ SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain,
ID.AddInteger(getSyntheticNodeSubclassData<MaskedLoadSDNode>(
dl.getIROrder(), VTs, AM, ExtTy, isExpanding, MemVT, MMO));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MMO->getFlags());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
cast<MaskedLoadSDNode>(E)->refineAlignment(MMO);
@@ -8118,6 +8621,7 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl,
ID.AddInteger(getSyntheticNodeSubclassData<MaskedStoreSDNode>(
dl.getIROrder(), VTs, AM, IsTruncating, IsCompressing, MemVT, MMO));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MMO->getFlags());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
cast<MaskedStoreSDNode>(E)->refineAlignment(MMO);
@@ -8159,13 +8663,13 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl,
ID.AddInteger(getSyntheticNodeSubclassData<MaskedGatherSDNode>(
dl.getIROrder(), VTs, MemVT, MMO, IndexType, ExtTy));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MMO->getFlags());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
cast<MaskedGatherSDNode>(E)->refineAlignment(MMO);
return SDValue(E, 0);
}
- IndexType = TLI->getCanonicalIndexType(IndexType, MemVT, Ops[4]);
auto *N = newSDNode<MaskedGatherSDNode>(dl.getIROrder(), dl.getDebugLoc(),
VTs, MemVT, MMO, IndexType, ExtTy);
createOperands(N, Ops);
@@ -8206,13 +8710,13 @@ SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl,
ID.AddInteger(getSyntheticNodeSubclassData<MaskedScatterSDNode>(
dl.getIROrder(), VTs, MemVT, MMO, IndexType, IsTrunc));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MMO->getFlags());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
cast<MaskedScatterSDNode>(E)->refineAlignment(MMO);
return SDValue(E, 0);
}
- IndexType = TLI->getCanonicalIndexType(IndexType, MemVT, Ops[4]);
auto *N = newSDNode<MaskedScatterSDNode>(dl.getIROrder(), dl.getDebugLoc(),
VTs, MemVT, MMO, IndexType, IsTrunc);
createOperands(N, Ops);
@@ -8410,6 +8914,41 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
assert(Ops[2].getValueType() == Ops[3].getValueType() &&
"LHS/RHS of comparison should match types!");
break;
+ case ISD::VP_ADD:
+ case ISD::VP_SUB:
+ // If it is VP_ADD/VP_SUB mask operation then turn it to VP_XOR
+ if (VT.isVector() && VT.getVectorElementType() == MVT::i1)
+ Opcode = ISD::VP_XOR;
+ break;
+ case ISD::VP_MUL:
+ // If it is VP_MUL mask operation then turn it to VP_AND
+ if (VT.isVector() && VT.getVectorElementType() == MVT::i1)
+ Opcode = ISD::VP_AND;
+ break;
+ case ISD::VP_REDUCE_MUL:
+ // If it is VP_REDUCE_MUL mask operation then turn it to VP_REDUCE_AND
+ if (VT == MVT::i1)
+ Opcode = ISD::VP_REDUCE_AND;
+ break;
+ case ISD::VP_REDUCE_ADD:
+ // If it is VP_REDUCE_ADD mask operation then turn it to VP_REDUCE_XOR
+ if (VT == MVT::i1)
+ Opcode = ISD::VP_REDUCE_XOR;
+ break;
+ case ISD::VP_REDUCE_SMAX:
+ case ISD::VP_REDUCE_UMIN:
+ // If it is VP_REDUCE_SMAX/VP_REDUCE_UMIN mask operation then turn it to
+ // VP_REDUCE_AND.
+ if (VT == MVT::i1)
+ Opcode = ISD::VP_REDUCE_AND;
+ break;
+ case ISD::VP_REDUCE_SMIN:
+ case ISD::VP_REDUCE_UMAX:
+ // If it is VP_REDUCE_SMIN/VP_REDUCE_UMAX mask operation then turn it to
+ // VP_REDUCE_OR.
+ if (VT == MVT::i1)
+ Opcode = ISD::VP_REDUCE_OR;
+ break;
}
// Memoize nodes.
@@ -8456,7 +8995,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
ArrayRef<SDValue> Ops, const SDNodeFlags Flags) {
if (VTList.NumVTs == 1)
- return getNode(Opcode, DL, VTList.VTs[0], Ops);
+ return getNode(Opcode, DL, VTList.VTs[0], Ops, Flags);
#ifndef NDEBUG
for (auto &Op : Ops)
@@ -9669,19 +10208,36 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To){
namespace {
- /// UseMemo - This class is used by SelectionDAG::ReplaceAllUsesOfValuesWith
- /// to record information about a use.
- struct UseMemo {
- SDNode *User;
- unsigned Index;
- SDUse *Use;
- };
+/// UseMemo - This class is used by SelectionDAG::ReplaceAllUsesOfValuesWith
+/// to record information about a use.
+struct UseMemo {
+ SDNode *User;
+ unsigned Index;
+ SDUse *Use;
+};
- /// operator< - Sort Memos by User.
- bool operator<(const UseMemo &L, const UseMemo &R) {
- return (intptr_t)L.User < (intptr_t)R.User;
+/// operator< - Sort Memos by User.
+bool operator<(const UseMemo &L, const UseMemo &R) {
+ return (intptr_t)L.User < (intptr_t)R.User;
+}
+
+/// RAUOVWUpdateListener - Helper for ReplaceAllUsesOfValuesWith - When the node
+/// pointed to by a UseMemo is deleted, set the User to nullptr to indicate that
+/// the node already has been taken care of recursively.
+class RAUOVWUpdateListener : public SelectionDAG::DAGUpdateListener {
+ SmallVector<UseMemo, 4> &Uses;
+
+ void NodeDeleted(SDNode *N, SDNode *E) override {
+ for (UseMemo &Memo : Uses)
+ if (Memo.User == N)
+ Memo.User = nullptr;
}
+public:
+ RAUOVWUpdateListener(SelectionDAG &d, SmallVector<UseMemo, 4> &uses)
+ : SelectionDAG::DAGUpdateListener(d), Uses(uses) {}
+};
+
} // end anonymous namespace
bool SelectionDAG::calculateDivergence(SDNode *N) {
@@ -9773,12 +10329,19 @@ void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From,
// Sort the uses, so that all the uses from a given User are together.
llvm::sort(Uses);
+ RAUOVWUpdateListener Listener(*this, Uses);
for (unsigned UseIndex = 0, UseIndexEnd = Uses.size();
UseIndex != UseIndexEnd; ) {
// We know that this user uses some value of From. If it is the right
// value, update it.
SDNode *User = Uses[UseIndex].User;
+ // If the node has been deleted by recursive CSE updates when updating
+ // another node, then just skip this entry.
+ if (User == nullptr) {
+ ++UseIndex;
+ continue;
+ }
// This node is about to morph, remove its old self from the CSE maps.
RemoveNodeFromCSEMaps(User);
@@ -9975,6 +10538,11 @@ bool llvm::isOneConstant(SDValue V) {
return Const != nullptr && Const->isOne();
}
+bool llvm::isMinSignedConstant(SDValue V) {
+ ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
+ return Const != nullptr && Const->isMinSignedValue();
+}
+
SDValue llvm::peekThroughBitcasts(SDValue V) {
while (V.getOpcode() == ISD::BITCAST)
V = V.getOperand(0);
@@ -10105,10 +10673,9 @@ bool llvm::isNullOrNullSplat(SDValue N, bool AllowUndefs) {
}
bool llvm::isOneOrOneSplat(SDValue N, bool AllowUndefs) {
- // TODO: may want to use peekThroughBitcast() here.
- unsigned BitWidth = N.getScalarValueSizeInBits();
- ConstantSDNode *C = isConstOrConstSplat(N, AllowUndefs);
- return C && C->isOne() && C->getValueSizeInBits(0) == BitWidth;
+ ConstantSDNode *C =
+ isConstOrConstSplat(N, AllowUndefs, /*AllowTruncation*/ true);
+ return C && C->isOne();
}
bool llvm::isAllOnesOrAllOnesSplat(SDValue N, bool AllowUndefs) {
@@ -10957,9 +11524,8 @@ bool BuildVectorSDNode::getConstantRawBits(
auto *CInt = dyn_cast<ConstantSDNode>(Op);
auto *CFP = dyn_cast<ConstantFPSDNode>(Op);
assert((CInt || CFP) && "Unknown constant");
- SrcBitElements[I] =
- CInt ? CInt->getAPIntValue().truncOrSelf(SrcEltSizeInBits)
- : CFP->getValueAPF().bitcastToAPInt();
+ SrcBitElements[I] = CInt ? CInt->getAPIntValue().trunc(SrcEltSizeInBits)
+ : CFP->getValueAPF().bitcastToAPInt();
}
// Recast to dst width.
@@ -11078,6 +11644,10 @@ SDNode *SelectionDAG::isConstantFPBuildVectorOrConstantFP(SDValue N) const {
if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
return N.getNode();
+ if ((N.getOpcode() == ISD::SPLAT_VECTOR) &&
+ isa<ConstantFPSDNode>(N.getOperand(0)))
+ return N.getNode();
+
return nullptr;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
index 6d8252046501..d236433f6fb4 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
@@ -96,7 +96,7 @@ bool BaseIndexOffset::computeAliasing(const SDNode *Op0,
if (!(BasePtr0.getBase().getNode() && BasePtr1.getBase().getNode()))
return false;
int64_t PtrDiff;
- if (NumBytes0.hasValue() && NumBytes1.hasValue() &&
+ if (NumBytes0 && NumBytes1 &&
BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff)) {
// If the size of memory access is unknown, do not use it to analysis.
// One example of unknown size memory access is to load/store scalable
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index c61716ba1676..37d05cdba76d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -24,25 +24,21 @@
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/EHPersonalities.h"
-#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryLocation.h"
-#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/CodeGenCommonISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GCMetadata.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineInstrBundleIterator.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
@@ -89,7 +85,6 @@
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -102,10 +97,8 @@
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/Utils/Local.h"
#include <cstddef>
-#include <cstring>
#include <iterator>
#include <limits>
-#include <numeric>
#include <tuple>
using namespace llvm;
@@ -224,10 +217,10 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL,
std::swap(Lo, Hi);
EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi);
- Hi =
- DAG.getNode(ISD::SHL, DL, TotalVT, Hi,
- DAG.getConstant(Lo.getValueSizeInBits(), DL,
- TLI.getPointerTy(DAG.getDataLayout())));
+ Hi = DAG.getNode(ISD::SHL, DL, TotalVT, Hi,
+ DAG.getConstant(Lo.getValueSizeInBits(), DL,
+ TLI.getShiftAmountTy(
+ TotalVT, DAG.getDataLayout())));
Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo);
Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi);
}
@@ -276,7 +269,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL,
// For a truncate, see if we have any information to
// indicate whether the truncated bits will always be
// zero or sign-extension.
- if (AssertOp.hasValue())
+ if (AssertOp)
Val = DAG.getNode(*AssertOp, DL, PartEVT, Val,
DAG.getValueType(ValueVT));
return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
@@ -330,7 +323,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
Optional<CallingConv::ID> CallConv) {
assert(ValueVT.isVector() && "Not a vector value");
assert(NumParts > 0 && "No parts to assemble!");
- const bool IsABIRegCopy = CallConv.hasValue();
+ const bool IsABIRegCopy = CallConv.has_value();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Val = Parts[0];
@@ -344,7 +337,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
if (IsABIRegCopy) {
NumRegs = TLI.getVectorTypeBreakdownForCallingConv(
- *DAG.getContext(), CallConv.getValue(), ValueVT, IntermediateVT,
+ *DAG.getContext(), *CallConv, ValueVT, IntermediateVT,
NumIntermediates, RegisterVT);
} else {
NumRegs =
@@ -566,7 +559,7 @@ static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
unsigned RoundBits = RoundParts * PartBits;
unsigned OddParts = NumParts - RoundParts;
SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val,
- DAG.getShiftAmountConstant(RoundBits, ValueVT, DL, /*LegalTypes*/false));
+ DAG.getShiftAmountConstant(RoundBits, ValueVT, DL));
getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V,
CallConv);
@@ -654,7 +647,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
EVT ValueVT = Val.getValueType();
assert(ValueVT.isVector() && "Not a vector");
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- const bool IsABIRegCopy = CallConv.hasValue();
+ const bool IsABIRegCopy = CallConv.has_value();
if (NumParts == 1) {
EVT PartEVT = PartVT;
@@ -733,7 +726,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
DestEltCnt = ElementCount::getFixed(NumIntermediates);
EVT BuiltVectorTy = EVT::getVectorVT(
- *DAG.getContext(), IntermediateVT.getScalarType(), DestEltCnt.getValue());
+ *DAG.getContext(), IntermediateVT.getScalarType(), *DestEltCnt);
if (ValueVT == BuiltVectorTy) {
// Nothing to do.
@@ -926,10 +919,7 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG,
CallConv.getValue(), RegVTs[Value])
: RegVTs[Value];
- // We need to zero extend constants that are liveout to match assumptions
- // in FunctionLoweringInfo::ComputePHILiveOutRegInfo.
- if (ExtendKind == ISD::ANY_EXTEND &&
- (TLI.isZExtFree(Val, RegisterVT) || isa<ConstantSDNode>(Val)))
+ if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT))
ExtendKind = ISD::ZERO_EXTEND;
getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value), &Parts[Part],
@@ -1239,7 +1229,8 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
// in the first place we should not be more successful here). Unless we
// have some test case that prove this to be correct we should avoid
// calling EmitFuncArgumentDbgValue here.
- if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, false, Val)) {
+ if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl,
+ FuncArgumentDbgValueKind::Value, Val)) {
LLVM_DEBUG(dbgs() << "Resolve dangling debug info [order="
<< DbgSDNodeOrder << "] for:\n " << *DI << "\n");
LLVM_DEBUG(dbgs() << " By mapping to:\n "; Val.dump());
@@ -1370,7 +1361,9 @@ bool SelectionDAGBuilder::handleDebugValue(ArrayRef<const Value *> Values,
N = UnusedArgNodeMap[V];
if (N.getNode()) {
// Only emit func arg dbg value for non-variadic dbg.values for now.
- if (!IsVariadic && EmitFuncArgumentDbgValue(V, Var, Expr, dl, false, N))
+ if (!IsVariadic &&
+ EmitFuncArgumentDbgValue(V, Var, Expr, dl,
+ FuncArgumentDbgValueKind::Value, N))
return true;
if (auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode())) {
// Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can
@@ -1642,7 +1635,9 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
Ops.push_back(getValue(CV->getOperand(i)));
return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops);
- } else if (isa<ConstantAggregateZero>(C)) {
+ }
+
+ if (isa<ConstantAggregateZero>(C)) {
EVT EltVT =
TLI.getValueType(DAG.getDataLayout(), VecTy->getElementType());
@@ -1654,12 +1649,12 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
if (isa<ScalableVectorType>(VecTy))
return NodeMap[V] = DAG.getSplatVector(VT, getCurSDLoc(), Op);
- else {
- SmallVector<SDValue, 16> Ops;
- Ops.assign(cast<FixedVectorType>(VecTy)->getNumElements(), Op);
- return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops);
- }
+
+ SmallVector<SDValue, 16> Ops;
+ Ops.assign(cast<FixedVectorType>(VecTy)->getNumElements(), Op);
+ return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops);
}
+
llvm_unreachable("Unknown vector constant");
}
@@ -1683,11 +1678,12 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
}
- if (const MetadataAsValue *MD = dyn_cast<MetadataAsValue>(V)) {
+ if (const MetadataAsValue *MD = dyn_cast<MetadataAsValue>(V))
return DAG.getMDNode(cast<MDNode>(MD->getMetadata()));
- }
+
if (const auto *BB = dyn_cast<BasicBlock>(V))
return DAG.getBasicBlock(FuncInfo.MBBMap[BB]);
+
llvm_unreachable("Can't get register for value!");
}
@@ -2751,10 +2747,10 @@ SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) {
SDValue Chain =
TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::isVoid,
None, CallOptions, getCurSDLoc()).second;
- // On PS4, the "return address" must still be within the calling function,
- // even if it's at the very end, so emit an explicit TRAP here.
+ // On PS4/PS5, the "return address" must still be within the calling
+ // function, even if it's at the very end, so emit an explicit TRAP here.
// Passing 'true' for doesNotReturn above won't generate the trap for us.
- if (TM.getTargetTriple().isPS4CPU())
+ if (TM.getTargetTriple().isPS())
Chain = DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, Chain);
// WebAssembly needs an unreachable instruction after a non-returning call,
// because the function return type can be different from __stack_chk_fail's
@@ -3153,26 +3149,12 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
EVT ShiftTy = DAG.getTargetLoweringInfo().getShiftAmountTy(
Op1.getValueType(), DAG.getDataLayout());
- // Coerce the shift amount to the right type if we can.
+ // Coerce the shift amount to the right type if we can. This exposes the
+ // truncate or zext to optimization early.
if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) {
- unsigned ShiftSize = ShiftTy.getSizeInBits();
- unsigned Op2Size = Op2.getValueSizeInBits();
- SDLoc DL = getCurSDLoc();
-
- // If the operand is smaller than the shift count type, promote it.
- if (ShiftSize > Op2Size)
- Op2 = DAG.getNode(ISD::ZERO_EXTEND, DL, ShiftTy, Op2);
-
- // If the operand is larger than the shift count type but the shift
- // count type has enough bits to represent any shift value, truncate
- // it now. This is a common case and it exposes the truncate to
- // optimization early.
- else if (ShiftSize >= Log2_32_Ceil(Op1.getValueSizeInBits()))
- Op2 = DAG.getNode(ISD::TRUNCATE, DL, ShiftTy, Op2);
- // Otherwise we'll need to temporarily settle for some other convenient
- // type. Type legalization will make adjustments once the shiftee is split.
- else
- Op2 = DAG.getZExtOrTrunc(Op2, DL, MVT::i32);
+ assert(ShiftTy.getSizeInBits() >= Log2_32_Ceil(Op1.getValueSizeInBits()) &&
+ "Unexpected shift type");
+ Op2 = DAG.getZExtOrTrunc(Op2, getCurSDLoc(), ShiftTy);
}
bool nuw = false;
@@ -3819,13 +3801,8 @@ void SelectionDAGBuilder::visitInsertValue(const User &I) {
DAG.getVTList(AggValueVTs), Values));
}
-void SelectionDAGBuilder::visitExtractValue(const User &I) {
- ArrayRef<unsigned> Indices;
- if (const ExtractValueInst *EV = dyn_cast<ExtractValueInst>(&I))
- Indices = EV->getIndices();
- else
- Indices = cast<ConstantExpr>(&I)->getIndices();
-
+void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) {
+ ArrayRef<unsigned> Indices = I.getIndices();
const Value *Op0 = I.getOperand(0);
Type *AggTy = Op0->getType();
Type *ValTy = I.getType();
@@ -4379,7 +4356,8 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
// In all other cases the function returns 'false'.
static bool getUniformBase(const Value *Ptr, SDValue &Base, SDValue &Index,
ISD::MemIndexType &IndexType, SDValue &Scale,
- SelectionDAGBuilder *SDB, const BasicBlock *CurBB) {
+ SelectionDAGBuilder *SDB, const BasicBlock *CurBB,
+ uint64_t ElemSize) {
SelectionDAG& DAG = SDB->DAG;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
const DataLayout &DL = DAG.getDataLayout();
@@ -4419,9 +4397,16 @@ static bool getUniformBase(const Value *Ptr, SDValue &Base, SDValue &Index,
Base = SDB->getValue(BasePtr);
Index = SDB->getValue(IndexVal);
IndexType = ISD::SIGNED_SCALED;
- Scale = DAG.getTargetConstant(
- DL.getTypeAllocSize(GEP->getResultElementType()),
- SDB->getCurSDLoc(), TLI.getPointerTy(DL));
+
+ // MGATHER/MSCATTER are only required to support scaling by one or by the
+ // element size. Other scales may be produced using target-specific DAG
+ // combines.
+ uint64_t ScaleVal = DL.getTypeAllocSize(GEP->getResultElementType());
+ if (ScaleVal != ElemSize && ScaleVal != 1)
+ return false;
+
+ Scale =
+ DAG.getTargetConstant(ScaleVal, SDB->getCurSDLoc(), TLI.getPointerTy(DL));
return true;
}
@@ -4435,7 +4420,7 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
EVT VT = Src0.getValueType();
Align Alignment = cast<ConstantInt>(I.getArgOperand(2))
->getMaybeAlignValue()
- .getValueOr(DAG.getEVTAlign(VT.getScalarType()));
+ .value_or(DAG.getEVTAlign(VT.getScalarType()));
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Base;
@@ -4443,7 +4428,7 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
ISD::MemIndexType IndexType;
SDValue Scale;
bool UniformBase = getUniformBase(Ptr, Base, Index, IndexType, Scale, this,
- I.getParent());
+ I.getParent(), VT.getScalarStoreSize());
unsigned AS = Ptr->getType()->getScalarType()->getPointerAddressSpace();
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
@@ -4454,7 +4439,7 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
if (!UniformBase) {
Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
Index = getValue(Ptr);
- IndexType = ISD::SIGNED_UNSCALED;
+ IndexType = ISD::SIGNED_SCALED;
Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout()));
}
@@ -4541,7 +4526,7 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
Align Alignment = cast<ConstantInt>(I.getArgOperand(1))
->getMaybeAlignValue()
- .getValueOr(DAG.getEVTAlign(VT.getScalarType()));
+ .value_or(DAG.getEVTAlign(VT.getScalarType()));
const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
@@ -4551,7 +4536,7 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
ISD::MemIndexType IndexType;
SDValue Scale;
bool UniformBase = getUniformBase(Ptr, Base, Index, IndexType, Scale, this,
- I.getParent());
+ I.getParent(), VT.getScalarStoreSize());
unsigned AS = Ptr->getType()->getScalarType()->getPointerAddressSpace();
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo(AS), MachineMemOperand::MOLoad,
@@ -4562,7 +4547,7 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
if (!UniformBase) {
Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
Index = getValue(Ptr);
- IndexType = ISD::SIGNED_UNSCALED;
+ IndexType = ISD::SIGNED_SCALED;
Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout()));
}
@@ -4681,7 +4666,7 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
EVT MemVT = TLI.getMemValueType(DAG.getDataLayout(), I.getType());
if (!TLI.supportsUnalignedAtomics() &&
- I.getAlignment() < MemVT.getSizeInBits() / 8)
+ I.getAlign().value() < MemVT.getSizeInBits() / 8)
report_fatal_error("Cannot generate unaligned atomic load");
auto Flags = TLI.getLoadMemOperandFlags(I, DAG.getDataLayout());
@@ -4733,7 +4718,7 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
EVT MemVT =
TLI.getMemValueType(DAG.getDataLayout(), I.getValueOperand()->getType());
- if (I.getAlignment() < MemVT.getSizeInBits() / 8)
+ if (I.getAlign().value() < MemVT.getSizeInBits() / 8)
report_fatal_error("Cannot generate unaligned atomic store");
auto Flags = TLI.getStoreMemOperandFlags(I, DAG.getDataLayout());
@@ -4784,7 +4769,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
}
}
- // Info is set by getTgtMemInstrinsic
+ // Info is set by getTgtMemIntrinsic
TargetLowering::IntrinsicInfo Info;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I,
@@ -4898,7 +4883,8 @@ static SDValue GetExponent(SelectionDAG &DAG, SDValue Op,
DAG.getConstant(0x7f800000, dl, MVT::i32));
SDValue t1 = DAG.getNode(
ISD::SRL, dl, MVT::i32, t0,
- DAG.getConstant(23, dl, TLI.getPointerTy(DAG.getDataLayout())));
+ DAG.getConstant(23, dl,
+ TLI.getShiftAmountTy(MVT::i32, DAG.getDataLayout())));
SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1,
DAG.getConstant(127, dl, MVT::i32));
return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2);
@@ -4923,10 +4909,11 @@ static SDValue getLimitedPrecisionExp2(SDValue t0, const SDLoc &dl,
SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
// IntegerPartOfX <<= 23;
- IntegerPartOfX = DAG.getNode(
- ISD::SHL, dl, MVT::i32, IntegerPartOfX,
- DAG.getConstant(23, dl, DAG.getTargetLoweringInfo().getPointerTy(
- DAG.getDataLayout())));
+ IntegerPartOfX =
+ DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
+ DAG.getConstant(23, dl,
+ DAG.getTargetLoweringInfo().getShiftAmountTy(
+ MVT::i32, DAG.getDataLayout())));
SDValue TwoToFractionalPartOfX;
if (LimitFloatPrecision <= 6) {
@@ -5354,38 +5341,36 @@ static SDValue expandPow(const SDLoc &dl, SDValue LHS, SDValue RHS,
/// ExpandPowI - Expand a llvm.powi intrinsic.
static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS,
SelectionDAG &DAG) {
- // If RHS is a constant, we can expand this out to a multiplication tree,
- // otherwise we end up lowering to a call to __powidf2 (for example). When
- // optimizing for size, we only want to do this if the expansion would produce
- // a small number of multiplies, otherwise we do the full expansion.
+ // If RHS is a constant, we can expand this out to a multiplication tree if
+ // it's beneficial on the target, otherwise we end up lowering to a call to
+ // __powidf2 (for example).
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
- // Get the exponent as a positive value.
unsigned Val = RHSC->getSExtValue();
- if ((int)Val < 0) Val = -Val;
// powi(x, 0) -> 1.0
if (Val == 0)
return DAG.getConstantFP(1.0, DL, LHS.getValueType());
- bool OptForSize = DAG.shouldOptForSize();
- if (!OptForSize ||
- // If optimizing for size, don't insert too many multiplies.
- // This inserts up to 5 multiplies.
- countPopulation(Val) + Log2_32(Val) < 7) {
+ if (DAG.getTargetLoweringInfo().isBeneficialToExpandPowI(
+ Val, DAG.shouldOptForSize())) {
+ // Get the exponent as a positive value.
+ if ((int)Val < 0)
+ Val = -Val;
// We use the simple binary decomposition method to generate the multiply
// sequence. There are more optimal ways to do this (for example,
// powi(x,15) generates one more multiply than it should), but this has
// the benefit of being both really simple and much better than a libcall.
- SDValue Res; // Logically starts equal to 1.0
+ SDValue Res; // Logically starts equal to 1.0
SDValue CurSquare = LHS;
// TODO: Intrinsics should have fast-math-flags that propagate to these
// nodes.
while (Val) {
if (Val & 1) {
if (Res.getNode())
- Res = DAG.getNode(ISD::FMUL, DL,Res.getValueType(), Res, CurSquare);
+ Res =
+ DAG.getNode(ISD::FMUL, DL, Res.getValueType(), Res, CurSquare);
else
- Res = CurSquare; // 1.0*CurSquare.
+ Res = CurSquare; // 1.0*CurSquare.
}
CurSquare = DAG.getNode(ISD::FMUL, DL, CurSquare.getValueType(),
@@ -5506,7 +5491,7 @@ getUnderlyingArgRegs(SmallVectorImpl<std::pair<unsigned, TypeSize>> &Regs,
/// appear for function arguments or in the prologue.
bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
const Value *V, DILocalVariable *Variable, DIExpression *Expr,
- DILocation *DL, bool IsDbgDeclare, const SDValue &N) {
+ DILocation *DL, FuncArgumentDbgValueKind Kind, const SDValue &N) {
const Argument *Arg = dyn_cast<Argument>(V);
if (!Arg)
return false;
@@ -5540,7 +5525,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
}
};
- if (!IsDbgDeclare) {
+ if (Kind == FuncArgumentDbgValueKind::Value) {
// ArgDbgValues are hoisted to the beginning of the entry block. So we
// should only emit as ArgDbgValue if the dbg.value intrinsic is found in
// the entry block.
@@ -5627,7 +5612,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
}
if (Reg) {
Op = MachineOperand::CreateReg(Reg, false);
- IsIndirect = IsDbgDeclare;
+ IsIndirect = Kind != FuncArgumentDbgValueKind::Value;
}
}
@@ -5675,7 +5660,8 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
continue;
}
MachineInstr *NewMI =
- MakeVRegDbgValue(RegAndSize.first, *FragmentExpr, IsDbgDeclare);
+ MakeVRegDbgValue(RegAndSize.first, *FragmentExpr,
+ Kind != FuncArgumentDbgValueKind::Value);
FuncInfo.ArgDbgValues.push_back(NewMI);
}
};
@@ -5693,7 +5679,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
}
Op = MachineOperand::CreateReg(VMI->second, false);
- IsIndirect = IsDbgDeclare;
+ IsIndirect = Kind != FuncArgumentDbgValueKind::Value;
} else if (ArgRegsAndSizes.size() > 1) {
// This was split due to the calling convention, and no virtual register
// mapping exists for the value.
@@ -5715,6 +5701,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
NewMI = BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), true, *Op,
Variable, Expr);
+ // Otherwise, use ArgDbgValues.
FuncInfo.ArgDbgValues.push_back(NewMI);
return true;
}
@@ -5820,16 +5807,18 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::vacopy: visitVACopy(I); return;
case Intrinsic::returnaddress:
setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl,
- TLI.getPointerTy(DAG.getDataLayout()),
+ TLI.getValueType(DAG.getDataLayout(), I.getType()),
getValue(I.getArgOperand(0))));
return;
case Intrinsic::addressofreturnaddress:
- setValue(&I, DAG.getNode(ISD::ADDROFRETURNADDR, sdl,
- TLI.getPointerTy(DAG.getDataLayout())));
+ setValue(&I,
+ DAG.getNode(ISD::ADDROFRETURNADDR, sdl,
+ TLI.getValueType(DAG.getDataLayout(), I.getType())));
return;
case Intrinsic::sponentry:
- setValue(&I, DAG.getNode(ISD::SPONENTRY, sdl,
- TLI.getFrameIndexTy(DAG.getDataLayout())));
+ setValue(&I,
+ DAG.getNode(ISD::SPONENTRY, sdl,
+ TLI.getValueType(DAG.getDataLayout(), I.getType())));
return;
case Intrinsic::frameaddress:
setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl,
@@ -5867,7 +5856,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
// @llvm.memcpy defines 0 and 1 to both mean no alignment.
Align DstAlign = MCI.getDestAlign().valueOrOne();
Align SrcAlign = MCI.getSourceAlign().valueOrOne();
- Align Alignment = commonAlignment(DstAlign, SrcAlign);
+ Align Alignment = std::min(DstAlign, SrcAlign);
bool isVol = MCI.isVolatile();
bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
// FIXME: Support passing different dest/src alignments to the memcpy DAG
@@ -5890,7 +5879,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
// @llvm.memcpy.inline defines 0 and 1 to both mean no alignment.
Align DstAlign = MCI.getDestAlign().valueOrOne();
Align SrcAlign = MCI.getSourceAlign().valueOrOne();
- Align Alignment = commonAlignment(DstAlign, SrcAlign);
+ Align Alignment = std::min(DstAlign, SrcAlign);
bool isVol = MCI.isVolatile();
bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
// FIXME: Support passing different dest/src alignments to the memcpy DAG
@@ -5913,10 +5902,28 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
bool isVol = MSI.isVolatile();
bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
SDValue Root = isVol ? getRoot() : getMemoryRoot();
- SDValue MS = DAG.getMemset(Root, sdl, Op1, Op2, Op3, Alignment, isVol, isTC,
+ SDValue MS = DAG.getMemset(
+ Root, sdl, Op1, Op2, Op3, Alignment, isVol, /* AlwaysInline */ false,
+ isTC, MachinePointerInfo(I.getArgOperand(0)), I.getAAMetadata());
+ updateDAGForMaybeTailCall(MS);
+ return;
+ }
+ case Intrinsic::memset_inline: {
+ const auto &MSII = cast<MemSetInlineInst>(I);
+ SDValue Dst = getValue(I.getArgOperand(0));
+ SDValue Value = getValue(I.getArgOperand(1));
+ SDValue Size = getValue(I.getArgOperand(2));
+ assert(isa<ConstantSDNode>(Size) && "memset_inline needs constant size");
+ // @llvm.memset defines 0 and 1 to both mean no alignment.
+ Align DstAlign = MSII.getDestAlign().valueOrOne();
+ bool isVol = MSII.isVolatile();
+ bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
+ SDValue Root = isVol ? getRoot() : getMemoryRoot();
+ SDValue MC = DAG.getMemset(Root, sdl, Dst, Value, Size, DstAlign, isVol,
+ /* AlwaysInline */ true, isTC,
MachinePointerInfo(I.getArgOperand(0)),
I.getAAMetadata());
- updateDAGForMaybeTailCall(MS);
+ updateDAGForMaybeTailCall(MC);
return;
}
case Intrinsic::memmove: {
@@ -5927,7 +5934,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
// @llvm.memmove defines 0 and 1 to both mean no alignment.
Align DstAlign = MMI.getDestAlign().valueOrOne();
Align SrcAlign = MMI.getSourceAlign().valueOrOne();
- Align Alignment = commonAlignment(DstAlign, SrcAlign);
+ Align Alignment = std::min(DstAlign, SrcAlign);
bool isVol = MMI.isVolatile();
bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
// FIXME: Support passing different dest/src alignments to the memmove DAG
@@ -5946,15 +5953,13 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
SDValue Src = getValue(MI.getRawSource());
SDValue Length = getValue(MI.getLength());
- unsigned DstAlign = MI.getDestAlignment();
- unsigned SrcAlign = MI.getSourceAlignment();
Type *LengthTy = MI.getLength()->getType();
unsigned ElemSz = MI.getElementSizeInBytes();
bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
- SDValue MC = DAG.getAtomicMemcpy(getRoot(), sdl, Dst, DstAlign, Src,
- SrcAlign, Length, LengthTy, ElemSz, isTC,
- MachinePointerInfo(MI.getRawDest()),
- MachinePointerInfo(MI.getRawSource()));
+ SDValue MC =
+ DAG.getAtomicMemcpy(getRoot(), sdl, Dst, Src, Length, LengthTy, ElemSz,
+ isTC, MachinePointerInfo(MI.getRawDest()),
+ MachinePointerInfo(MI.getRawSource()));
updateDAGForMaybeTailCall(MC);
return;
}
@@ -5964,15 +5969,13 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
SDValue Src = getValue(MI.getRawSource());
SDValue Length = getValue(MI.getLength());
- unsigned DstAlign = MI.getDestAlignment();
- unsigned SrcAlign = MI.getSourceAlignment();
Type *LengthTy = MI.getLength()->getType();
unsigned ElemSz = MI.getElementSizeInBytes();
bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
- SDValue MC = DAG.getAtomicMemmove(getRoot(), sdl, Dst, DstAlign, Src,
- SrcAlign, Length, LengthTy, ElemSz, isTC,
- MachinePointerInfo(MI.getRawDest()),
- MachinePointerInfo(MI.getRawSource()));
+ SDValue MC =
+ DAG.getAtomicMemmove(getRoot(), sdl, Dst, Src, Length, LengthTy, ElemSz,
+ isTC, MachinePointerInfo(MI.getRawDest()),
+ MachinePointerInfo(MI.getRawSource()));
updateDAGForMaybeTailCall(MC);
return;
}
@@ -5982,13 +5985,12 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
SDValue Val = getValue(MI.getValue());
SDValue Length = getValue(MI.getLength());
- unsigned DstAlign = MI.getDestAlignment();
Type *LengthTy = MI.getLength()->getType();
unsigned ElemSz = MI.getElementSizeInBytes();
bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
- SDValue MC = DAG.getAtomicMemset(getRoot(), sdl, Dst, DstAlign, Val, Length,
- LengthTy, ElemSz, isTC,
- MachinePointerInfo(MI.getRawDest()));
+ SDValue MC =
+ DAG.getAtomicMemset(getRoot(), sdl, Dst, Val, Length, LengthTy, ElemSz,
+ isTC, MachinePointerInfo(MI.getRawDest()));
updateDAGForMaybeTailCall(MC);
return;
}
@@ -6088,7 +6090,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
} else if (isa<Argument>(Address)) {
// Address is an argument, so try to emit its dbg value using
// virtual register info from the FuncInfo.ValueMap.
- EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, true, N);
+ EmitFuncArgumentDbgValue(Address, Variable, Expression, dl,
+ FuncArgumentDbgValueKind::Declare, N);
return;
} else {
SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(),
@@ -6098,8 +6101,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
} else {
// If Address is an argument then try to emit its dbg value using
// virtual register info from the FuncInfo.ValueMap.
- if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, true,
- N)) {
+ if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl,
+ FuncArgumentDbgValueKind::Declare, N)) {
LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI
<< " (could not emit func-arg dbg_value)\n");
}
@@ -6165,8 +6168,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
return;
case Intrinsic::eh_sjlj_callsite: {
MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
- ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(0));
- assert(CI && "Non-constant call site value in eh.sjlj.callsite!");
+ ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(0));
assert(MMI.getCurrentCallSite() == 0 && "Overlapping call sites!");
MMI.setCurrentCallSite(CI->getZExtValue());
@@ -6346,6 +6348,29 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
#include "llvm/IR/VPIntrinsics.def"
visitVectorPredicationIntrinsic(cast<VPIntrinsic>(I));
return;
+ case Intrinsic::fptrunc_round: {
+ // Get the last argument, the metadata and convert it to an integer in the
+ // call
+ Metadata *MD = cast<MetadataAsValue>(I.getArgOperand(1))->getMetadata();
+ Optional<RoundingMode> RoundMode =
+ convertStrToRoundingMode(cast<MDString>(MD)->getString());
+
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+
+ // Propagate fast-math-flags from IR to node(s).
+ SDNodeFlags Flags;
+ Flags.copyFMF(*cast<FPMathOperator>(&I));
+ SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);
+
+ SDValue Result;
+ Result = DAG.getNode(
+ ISD::FPTRUNC_ROUND, sdl, VT, getValue(I.getArgOperand(0)),
+ DAG.getTargetConstant((int)*RoundMode, sdl,
+ TLI.getPointerTy(DAG.getDataLayout())));
+ setValue(&I, Result);
+
+ return;
+ }
case Intrinsic::fmuladd: {
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
@@ -6400,6 +6425,31 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
setValue(&I, Res);
DAG.setRoot(Res.getValue(0));
return;
+ case Intrinsic::is_fpclass: {
+ const DataLayout DLayout = DAG.getDataLayout();
+ EVT DestVT = TLI.getValueType(DLayout, I.getType());
+ EVT ArgVT = TLI.getValueType(DLayout, I.getArgOperand(0)->getType());
+ unsigned Test = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
+ MachineFunction &MF = DAG.getMachineFunction();
+ const Function &F = MF.getFunction();
+ SDValue Op = getValue(I.getArgOperand(0));
+ SDNodeFlags Flags;
+ Flags.setNoFPExcept(
+ !F.getAttributes().hasFnAttr(llvm::Attribute::StrictFP));
+ // If ISD::IS_FPCLASS should be expanded, do it right now, because the
+ // expansion can use illegal types. Making expansion early allows
+ // legalizing these types prior to selection.
+ if (!TLI.isOperationLegalOrCustom(ISD::IS_FPCLASS, ArgVT)) {
+ SDValue Result = TLI.expandIS_FPCLASS(DestVT, Op, Test, Flags, sdl, DAG);
+ setValue(&I, Result);
+ return;
+ }
+
+ SDValue Check = DAG.getTargetConstant(Test, sdl, MVT::i32);
+ SDValue V = DAG.getNode(ISD::IS_FPCLASS, sdl, DestVT, {Op, Check}, Flags);
+ setValue(&I, V);
+ return;
+ }
case Intrinsic::pcmarker: {
SDValue Tmp = getValue(I.getArgOperand(0));
DAG.setRoot(DAG.getNode(ISD::PCMARKER, sdl, MVT::Other, getRoot(), Tmp));
@@ -6846,7 +6896,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
}
case Intrinsic::invariant_start:
// Discard region information.
- setValue(&I, DAG.getUNDEF(TLI.getPointerTy(DAG.getDataLayout())));
+ setValue(&I,
+ DAG.getUNDEF(TLI.getValueType(DAG.getDataLayout(), I.getType())));
return;
case Intrinsic::invariant_end:
// Discard region information.
@@ -7150,7 +7201,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
setValue(&I, SetCC);
return;
}
- case Intrinsic::experimental_vector_insert: {
+ case Intrinsic::vector_insert: {
SDValue Vec = getValue(I.getOperand(0));
SDValue SubVec = getValue(I.getOperand(1));
SDValue Index = getValue(I.getOperand(2));
@@ -7167,7 +7218,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
Index));
return;
}
- case Intrinsic::experimental_vector_extract: {
+ case Intrinsic::vector_extract: {
SDValue Vec = getValue(I.getOperand(0));
SDValue Index = getValue(I.getOperand(1));
EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
@@ -7245,7 +7296,7 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
};
SDVTList VTs = DAG.getVTList(ValueVTs);
- fp::ExceptionBehavior EB = FPI.getExceptionBehavior().getValue();
+ fp::ExceptionBehavior EB = *FPI.getExceptionBehavior();
SDNodeFlags Flags;
if (EB == fp::ExceptionBehavior::ebIgnore)
@@ -7310,13 +7361,14 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
static unsigned getISDForVPIntrinsic(const VPIntrinsic &VPIntrin) {
Optional<unsigned> ResOPC;
switch (VPIntrin.getIntrinsicID()) {
-#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID:
-#define BEGIN_REGISTER_VP_SDNODE(VPSD, ...) ResOPC = ISD::VPSD;
-#define END_REGISTER_VP_INTRINSIC(VPID) break;
+#define HELPER_MAP_VPID_TO_VPSD(VPID, VPSD) \
+ case Intrinsic::VPID: \
+ ResOPC = ISD::VPSD; \
+ break;
#include "llvm/IR/VPIntrinsics.def"
}
- if (!ResOPC.hasValue())
+ if (!ResOPC)
llvm_unreachable(
"Inconsistency: no SDNode available for this VPIntrinsic!");
@@ -7327,7 +7379,7 @@ static unsigned getISDForVPIntrinsic(const VPIntrinsic &VPIntrin) {
: ISD::VP_REDUCE_FMUL;
}
- return ResOPC.getValue();
+ return *ResOPC;
}
void SelectionDAGBuilder::visitVPLoadGather(const VPIntrinsic &VPIntrin, EVT VT,
@@ -7365,11 +7417,12 @@ void SelectionDAGBuilder::visitVPLoadGather(const VPIntrinsic &VPIntrin, EVT VT,
SDValue Base, Index, Scale;
ISD::MemIndexType IndexType;
bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale,
- this, VPIntrin.getParent());
+ this, VPIntrin.getParent(),
+ VT.getScalarStoreSize());
if (!UniformBase) {
Base = DAG.getConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout()));
Index = getValue(PtrOperand);
- IndexType = ISD::SIGNED_UNSCALED;
+ IndexType = ISD::SIGNED_SCALED;
Scale =
DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout()));
}
@@ -7421,11 +7474,12 @@ void SelectionDAGBuilder::visitVPStoreScatter(const VPIntrinsic &VPIntrin,
SDValue Base, Index, Scale;
ISD::MemIndexType IndexType;
bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale,
- this, VPIntrin.getParent());
+ this, VPIntrin.getParent(),
+ VT.getScalarStoreSize());
if (!UniformBase) {
Base = DAG.getConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout()));
Index = getValue(PtrOperand);
- IndexType = ISD::SIGNED_UNSCALED;
+ IndexType = ISD::SIGNED_SCALED;
Scale =
DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout()));
}
@@ -7444,18 +7498,104 @@ void SelectionDAGBuilder::visitVPStoreScatter(const VPIntrinsic &VPIntrin,
setValue(&VPIntrin, ST);
}
+void SelectionDAGBuilder::visitVPStridedLoad(
+ const VPIntrinsic &VPIntrin, EVT VT, SmallVectorImpl<SDValue> &OpValues) {
+ SDLoc DL = getCurSDLoc();
+ Value *PtrOperand = VPIntrin.getArgOperand(0);
+ MaybeAlign Alignment = VPIntrin.getPointerAlignment();
+ if (!Alignment)
+ Alignment = DAG.getEVTAlign(VT.getScalarType());
+ AAMDNodes AAInfo = VPIntrin.getAAMetadata();
+ const MDNode *Ranges = VPIntrin.getMetadata(LLVMContext::MD_range);
+ MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo);
+ bool AddToChain = !AA || !AA->pointsToConstantMemory(ML);
+ SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad,
+ MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges);
+
+ SDValue LD = DAG.getStridedLoadVP(VT, DL, InChain, OpValues[0], OpValues[1],
+ OpValues[2], OpValues[3], MMO,
+ false /*IsExpanding*/);
+
+ if (AddToChain)
+ PendingLoads.push_back(LD.getValue(1));
+ setValue(&VPIntrin, LD);
+}
+
+void SelectionDAGBuilder::visitVPStridedStore(
+ const VPIntrinsic &VPIntrin, SmallVectorImpl<SDValue> &OpValues) {
+ SDLoc DL = getCurSDLoc();
+ Value *PtrOperand = VPIntrin.getArgOperand(1);
+ EVT VT = OpValues[0].getValueType();
+ MaybeAlign Alignment = VPIntrin.getPointerAlignment();
+ if (!Alignment)
+ Alignment = DAG.getEVTAlign(VT.getScalarType());
+ AAMDNodes AAInfo = VPIntrin.getAAMetadata();
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore,
+ MemoryLocation::UnknownSize, *Alignment, AAInfo);
+
+ SDValue ST = DAG.getStridedStoreVP(
+ getMemoryRoot(), DL, OpValues[0], OpValues[1],
+ DAG.getUNDEF(OpValues[1].getValueType()), OpValues[2], OpValues[3],
+ OpValues[4], VT, MMO, ISD::UNINDEXED, /*IsTruncating*/ false,
+ /*IsCompressing*/ false);
+
+ DAG.setRoot(ST);
+ setValue(&VPIntrin, ST);
+}
+
+void SelectionDAGBuilder::visitVPCmp(const VPCmpIntrinsic &VPIntrin) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDLoc DL = getCurSDLoc();
+
+ ISD::CondCode Condition;
+ CmpInst::Predicate CondCode = VPIntrin.getPredicate();
+ bool IsFP = VPIntrin.getOperand(0)->getType()->isFPOrFPVectorTy();
+ if (IsFP) {
+ // FIXME: Regular fcmps are FPMathOperators which may have fast-math (nnan)
+ // flags, but calls that don't return floating-point types can't be
+ // FPMathOperators, like vp.fcmp. This affects constrained fcmp too.
+ Condition = getFCmpCondCode(CondCode);
+ if (TM.Options.NoNaNsFPMath)
+ Condition = getFCmpCodeWithoutNaN(Condition);
+ } else {
+ Condition = getICmpCondCode(CondCode);
+ }
+
+ SDValue Op1 = getValue(VPIntrin.getOperand(0));
+ SDValue Op2 = getValue(VPIntrin.getOperand(1));
+ // #2 is the condition code
+ SDValue MaskOp = getValue(VPIntrin.getOperand(3));
+ SDValue EVL = getValue(VPIntrin.getOperand(4));
+ MVT EVLParamVT = TLI.getVPExplicitVectorLengthTy();
+ assert(EVLParamVT.isScalarInteger() && EVLParamVT.bitsGE(MVT::i32) &&
+ "Unexpected target EVL type");
+ EVL = DAG.getNode(ISD::ZERO_EXTEND, DL, EVLParamVT, EVL);
+
+ EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ VPIntrin.getType());
+ setValue(&VPIntrin,
+ DAG.getSetCCVP(DL, DestVT, Op1, Op2, Condition, MaskOp, EVL));
+}
+
void SelectionDAGBuilder::visitVectorPredicationIntrinsic(
const VPIntrinsic &VPIntrin) {
SDLoc DL = getCurSDLoc();
unsigned Opcode = getISDForVPIntrinsic(VPIntrin);
+ auto IID = VPIntrin.getIntrinsicID();
+
+ if (const auto *CmpI = dyn_cast<VPCmpIntrinsic>(&VPIntrin))
+ return visitVPCmp(*CmpI);
+
SmallVector<EVT, 4> ValueVTs;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
ComputeValueVTs(TLI, DAG.getDataLayout(), VPIntrin.getType(), ValueVTs);
SDVTList VTs = DAG.getVTList(ValueVTs);
- auto EVLParamPos =
- VPIntrinsic::getVectorLengthParamPos(VPIntrin.getIntrinsicID());
+ auto EVLParamPos = VPIntrinsic::getVectorLengthParamPos(IID);
MVT EVLParamVT = TLI.getVPExplicitVectorLengthTy();
assert(EVLParamVT.isScalarInteger() && EVLParamVT.bitsGE(MVT::i32) &&
@@ -7472,7 +7612,10 @@ void SelectionDAGBuilder::visitVectorPredicationIntrinsic(
switch (Opcode) {
default: {
- SDValue Result = DAG.getNode(Opcode, DL, VTs, OpValues);
+ SDNodeFlags SDFlags;
+ if (auto *FPMO = dyn_cast<FPMathOperator>(&VPIntrin))
+ SDFlags.copyFMF(*FPMO);
+ SDValue Result = DAG.getNode(Opcode, DL, VTs, OpValues, SDFlags);
setValue(&VPIntrin, Result);
break;
}
@@ -7481,10 +7624,16 @@ void SelectionDAGBuilder::visitVectorPredicationIntrinsic(
visitVPLoadGather(VPIntrin, ValueVTs[0], OpValues,
Opcode == ISD::VP_GATHER);
break;
+ case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
+ visitVPStridedLoad(VPIntrin, ValueVTs[0], OpValues);
+ break;
case ISD::VP_STORE:
case ISD::VP_SCATTER:
visitVPStoreScatter(VPIntrin, OpValues, Opcode == ISD::VP_SCATTER);
break;
+ case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
+ visitVPStridedStore(VPIntrin, OpValues);
+ break;
}
}
@@ -7759,7 +7908,7 @@ void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I,
bool SelectionDAGBuilder::visitMemCmpBCmpCall(const CallInst &I) {
const Value *LHS = I.getArgOperand(0), *RHS = I.getArgOperand(1);
const Value *Size = I.getArgOperand(2);
- const ConstantInt *CSize = dyn_cast<ConstantInt>(Size);
+ const ConstantSDNode *CSize = dyn_cast<ConstantSDNode>(getValue(Size));
if (CSize && CSize->getZExtValue() == 0) {
EVT CallVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
I.getType(), true);
@@ -8280,7 +8429,7 @@ public:
// accessed type.
if (isIndirect) {
OpTy = ParamElemType;
- assert(OpTy && "Indirect opernad must have elementtype attribute");
+ assert(OpTy && "Indirect operand must have elementtype attribute");
}
// Look for vector wrapped in a struct. e.g. { <16 x i8> }.
@@ -8401,8 +8550,9 @@ getRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
SmallVector<unsigned, 4> Regs;
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
- // No work to do for memory operations.
- if (OpInfo.ConstraintType == TargetLowering::C_Memory)
+ // No work to do for memory/address operands.
+ if (OpInfo.ConstraintType == TargetLowering::C_Memory ||
+ OpInfo.ConstraintType == TargetLowering::C_Address)
return None;
// If this is a constraint for a single physreg, or a constraint for a
@@ -8582,7 +8732,7 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
if (OpInfo.hasArg()) {
OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
- Type *ParamElemTy = Call.getAttributes().getParamElementType(ArgNo);
+ Type *ParamElemTy = Call.getParamElementType(ArgNo);
EVT VT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI,
DAG.getDataLayout(), ParamElemTy);
OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
@@ -8660,8 +8810,9 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
// Compute the constraint code and ConstraintType to use.
TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG);
- if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
- OpInfo.Type == InlineAsm::isClobber)
+ if ((OpInfo.ConstraintType == TargetLowering::C_Memory &&
+ OpInfo.Type == InlineAsm::isClobber) ||
+ OpInfo.ConstraintType == TargetLowering::C_Address)
continue;
// If this is a memory input, and if the operand is not indirect, do what we
@@ -8711,7 +8862,7 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
: OpInfo;
const auto RegError =
getRegistersForValue(DAG, getCurSDLoc(), OpInfo, RefOpInfo);
- if (RegError.hasValue()) {
+ if (RegError) {
const MachineFunction &MF = DAG.getMachineFunction();
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
const char *RegName = TRI.getName(RegError.getValue());
@@ -8736,6 +8887,10 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
}
return false;
};
+ assert((OpInfo.ConstraintType != TargetLowering::C_Address ||
+ (OpInfo.Type == InlineAsm::isInput &&
+ !OpInfo.isMatchingInputConstraint())) &&
+ "Only address as input operand is allowed.");
switch (OpInfo.Type) {
case InlineAsm::isOutput:
@@ -8868,8 +9023,11 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
break;
}
- if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
- assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!");
+ if (OpInfo.ConstraintType == TargetLowering::C_Memory ||
+ OpInfo.ConstraintType == TargetLowering::C_Address) {
+ assert((OpInfo.isIndirect ||
+ OpInfo.ConstraintType != TargetLowering::C_Memory) &&
+ "Operand must be indirect to be a mem!");
assert(InOperandVal.getValueType() ==
TLI.getPointerTy(DAG.getDataLayout()) &&
"Memory operands expect pointer values");
@@ -9007,6 +9165,8 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
break;
case TargetLowering::C_Memory:
break; // Already handled.
+ case TargetLowering::C_Address:
+ break; // Silence warning.
case TargetLowering::C_Unknown:
assert(false && "Unexpected unknown constraint");
}
@@ -9953,8 +10113,9 @@ SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
llvm_unreachable("LowerOperation not implemented for this target!");
}
-void
-SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
+void SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V,
+ unsigned Reg,
+ ISD::NodeType ExtendType) {
SDValue Op = getNonRegisterValue(V);
assert((Op.getOpcode() != ISD::CopyFromReg ||
cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) &&
@@ -9969,10 +10130,11 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
None); // This is not an ABI copy.
SDValue Chain = DAG.getEntryNode();
- ISD::NodeType ExtendType = ISD::ANY_EXTEND;
- auto PreferredExtendIt = FuncInfo.PreferredExtendType.find(V);
- if (PreferredExtendIt != FuncInfo.PreferredExtendType.end())
- ExtendType = PreferredExtendIt->second;
+ if (ExtendType == ISD::ANY_EXTEND) {
+ auto PreferredExtendIt = FuncInfo.PreferredExtendType.find(V);
+ if (PreferredExtendIt != FuncInfo.PreferredExtendType.end())
+ ExtendType = PreferredExtendIt->second;
+ }
RFV.getCopyToRegs(Op, DAG, getCurSDLoc(), Chain, nullptr, V, ExtendType);
PendingExports.push_back(Chain);
}
@@ -10545,6 +10707,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
/// the end.
void
SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
const Instruction *TI = LLVMBB->getTerminator();
SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
@@ -10582,7 +10745,13 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
unsigned &RegOut = ConstantsOut[C];
if (RegOut == 0) {
RegOut = FuncInfo.CreateRegs(C);
- CopyValueToVirtualRegister(C, RegOut);
+ // We need to zero/sign extend ConstantInt phi operands to match
+ // assumptions in FunctionLoweringInfo::ComputePHILiveOutRegInfo.
+ ISD::NodeType ExtendType = ISD::ANY_EXTEND;
+ if (auto *CI = dyn_cast<ConstantInt>(C))
+ ExtendType = TLI.signExtendConstant(CI) ? ISD::SIGN_EXTEND
+ : ISD::ZERO_EXTEND;
+ CopyValueToVirtualRegister(C, RegOut, ExtendType);
}
Reg = RegOut;
} else {
@@ -10602,7 +10771,6 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
// Remember that this register needs to added to the machine PHI node as
// the input for this MBB.
SmallVector<EVT, 4> ValueVTs;
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
ComputeValueVTs(TLI, DAG.getDataLayout(), PN.getType(), ValueVTs);
for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
EVT VT = ValueVTs[vti];
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index ea48042a5dcf..72cca3d9b001 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -284,7 +284,8 @@ public:
return CurInst ? CurInst->getDebugLoc() : DebugLoc();
}
- void CopyValueToVirtualRegister(const Value *V, unsigned Reg);
+ void CopyValueToVirtualRegister(const Value *V, unsigned Reg,
+ ISD::NodeType ExtendType = ISD::ANY_EXTEND);
void visit(const Instruction &I);
@@ -527,7 +528,7 @@ private:
void visitInsertElement(const User &I);
void visitShuffleVector(const User &I);
- void visitExtractValue(const User &I);
+ void visitExtractValue(const ExtractValueInst &I);
void visitInsertValue(const User &I);
void visitLandingPad(const LandingPadInst &LP);
@@ -570,6 +571,11 @@ private:
SmallVector<SDValue, 7> &OpValues, bool IsGather);
void visitVPStoreScatter(const VPIntrinsic &VPIntrin,
SmallVector<SDValue, 7> &OpValues, bool IsScatter);
+ void visitVPStridedLoad(const VPIntrinsic &VPIntrin, EVT VT,
+ SmallVectorImpl<SDValue> &OpValues);
+ void visitVPStridedStore(const VPIntrinsic &VPIntrin,
+ SmallVectorImpl<SDValue> &OpValues);
+ void visitVPCmp(const VPCmpIntrinsic &VPIntrin);
void visitVectorPredicationIntrinsic(const VPIntrinsic &VPIntrin);
void visitVAStart(const CallInst &I);
@@ -602,12 +608,22 @@ private:
void emitInlineAsmError(const CallBase &Call, const Twine &Message);
+ /// An enum that states to emit func argument dbg value the kind of intrinsic
+ /// it originally had. This controls the internal behavior of
+ /// EmitFuncArgumentDbgValue.
+ enum class FuncArgumentDbgValueKind {
+ Value, // This was originally a llvm.dbg.value.
+ Addr, // This was originally a llvm.dbg.addr.
+ Declare, // This was originally a llvm.dbg.declare.
+ };
+
/// If V is an function argument then create corresponding DBG_VALUE machine
/// instruction for it now. At the end of instruction selection, they will be
/// inserted to the entry BB.
bool EmitFuncArgumentDbgValue(const Value *V, DILocalVariable *Variable,
DIExpression *Expr, DILocation *DL,
- bool IsDbgDeclare, const SDValue &N);
+ FuncArgumentDbgValueKind Kind,
+ const SDValue &N);
/// Return the next block after MBB, or nullptr if there is none.
MachineBasicBlock *NextBlock(MachineBasicBlock *MBB);
@@ -673,9 +689,7 @@ struct RegsForValue {
const DataLayout &DL, unsigned Reg, Type *Ty,
Optional<CallingConv::ID> CC);
- bool isABIMangled() const {
- return CallConv.hasValue();
- }
+ bool isABIMangled() const { return CallConv.has_value(); }
/// Add the specified values to this one.
void append(const RegsForValue &RHS) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 77e9e53668f9..bbfc6e5ef64f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -10,9 +10,9 @@
//
//===----------------------------------------------------------------------===//
+#include "SDNodeDbgValue.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
-#include "llvm/ADT/None.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/ISDOpcodes.h"
@@ -45,7 +45,6 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetMachine.h"
-#include "SDNodeDbgValue.h"
#include <cstdint>
#include <iterator>
@@ -231,6 +230,10 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::MUL: return "mul";
case ISD::MULHU: return "mulhu";
case ISD::MULHS: return "mulhs";
+ case ISD::AVGFLOORU: return "avgflooru";
+ case ISD::AVGFLOORS: return "avgfloors";
+ case ISD::AVGCEILU: return "avgceilu";
+ case ISD::AVGCEILS: return "avgceils";
case ISD::ABDS: return "abds";
case ISD::ABDU: return "abdu";
case ISD::SDIV: return "sdiv";
@@ -267,6 +270,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::FCOPYSIGN: return "fcopysign";
case ISD::FGETSIGN: return "fgetsign";
case ISD::FCANONICALIZE: return "fcanonicalize";
+ case ISD::IS_FPCLASS: return "is_fpclass";
case ISD::FPOW: return "fpow";
case ISD::STRICT_FPOW: return "strict_fpow";
case ISD::SMIN: return "smin";
@@ -361,6 +365,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::STRICT_FP16_TO_FP: return "strict_fp16_to_fp";
case ISD::FP_TO_FP16: return "fp_to_fp16";
case ISD::STRICT_FP_TO_FP16: return "strict_fp_to_fp16";
+ case ISD::BF16_TO_FP: return "bf16_to_fp";
+ case ISD::FP_TO_BF16: return "fp_to_bf16";
case ISD::LROUND: return "lround";
case ISD::STRICT_LROUND: return "strict_lround";
case ISD::LLROUND: return "llround";
@@ -814,6 +820,8 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
} else if (const LifetimeSDNode *LN = dyn_cast<LifetimeSDNode>(this)) {
if (LN->hasOffset())
OS << "<" << LN->getOffset() << " to " << LN->getOffset() + LN->getSize() << ">";
+ } else if (const auto *AA = dyn_cast<AssertAlignSDNode>(this)) {
+ OS << '<' << AA->getAlign().value() << '>';
}
if (VerboseDAGDumping) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index b83a60129c78..2b63359c2b1b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -15,11 +15,9 @@
#include "SelectionDAGBuilder.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/None.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
@@ -29,6 +27,7 @@
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
@@ -69,7 +68,6 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/InstIterator.h"
-#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -82,7 +80,6 @@
#include "llvm/IR/Value.h"
#include "llvm/InitializePasses.h"
#include "llvm/MC/MCInstrDesc.h"
-#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/Casting.h"
@@ -370,8 +367,8 @@ static void SplitCriticalSideEffectEdges(Function &Fn, DominatorTree *DT,
// PHI.
for (BasicBlock::iterator I = BB.begin(); (PN = dyn_cast<PHINode>(I)); ++I)
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
- ConstantExpr *CE = dyn_cast<ConstantExpr>(PN->getIncomingValue(i));
- if (!CE || !CE->canTrap()) continue;
+ Constant *C = dyn_cast<Constant>(PN->getIncomingValue(i));
+ if (!C || !C->canTrap()) continue;
// The only case we have to worry about is when the edge is critical.
// Since this block has a PHI Node, we assume it has multiple input
@@ -709,6 +706,7 @@ static void reportFastISelFailure(MachineFunction &MF,
report_fatal_error(Twine(R.getMsg()));
ORE.emit(R);
+ LLVM_DEBUG(dbgs() << R.getMsg() << "\n");
}
void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin,
@@ -1527,6 +1525,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
BeforeInst->hasOneUse() &&
FastIS->tryToFoldLoad(cast<LoadInst>(BeforeInst), Inst)) {
// If we succeeded, don't re-select the load.
+ LLVM_DEBUG(dbgs()
+ << "FastISel folded load: " << *BeforeInst << "\n");
BI = std::next(BasicBlock::const_iterator(BeforeInst));
--NumFastIselRemaining;
++NumFastIselSuccess;
@@ -3272,6 +3272,8 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
assert(RecNo < RecordedNodes.size() && "Invalid EmitMergeInputChains");
ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode());
+ // If the chained node is not the root, we can't fold it if it has
+ // multiple uses.
// FIXME: What if other value results of the node have uses not matched
// by this pattern?
if (ChainNodesMatched.back() != NodeToMatch &&
@@ -3309,6 +3311,8 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
assert(RecNo < RecordedNodes.size() && "Invalid EmitMergeInputChains");
ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode());
+ // If the chained node is not the root, we can't fold it if it has
+ // multiple uses.
// FIXME: What if other value results of the node have uses not matched
// by this pattern?
if (ChainNodesMatched.back() != NodeToMatch &&
@@ -3447,12 +3451,10 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
// such nodes must have a chain, it suffices to check ChainNodesMatched.
// We need to perform this check before potentially modifying one of the
// nodes via MorphNode.
- bool MayRaiseFPException = false;
- for (auto *N : ChainNodesMatched)
- if (mayRaiseFPException(N) && !N->getFlags().hasNoFPExcept()) {
- MayRaiseFPException = true;
- break;
- }
+ bool MayRaiseFPException =
+ llvm::any_of(ChainNodesMatched, [this](SDNode *N) {
+ return mayRaiseFPException(N) && !N->getFlags().hasNoFPExcept();
+ });
// Create the node.
MachineSDNode *Res = nullptr;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index d022e2a23ea0..b66eeb6d2bb1 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -13,15 +13,11 @@
#include "ScheduleDAGSDNodes.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/StringExtras.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/TargetRegisterInfo.h"
-#include "llvm/IR/Constants.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
using namespace llvm;
#define DEBUG_TYPE "dag-printer"
@@ -181,11 +177,11 @@ LLVM_DUMP_METHOD void SelectionDAG::dumpDotGraph(const Twine &FileName,
/// clearGraphAttrs - Clear all previously defined node graph attributes.
/// Intended to be used from a debugging tool (eg. gdb).
void SelectionDAG::clearGraphAttrs() {
-#ifndef NDEBUG
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
NodeGraphAttrs.clear();
#else
- errs() << "SelectionDAG::clearGraphAttrs is only available in debug builds"
- << " on systems with Graphviz or gv!\n";
+ errs() << "SelectionDAG::clearGraphAttrs is only available in builds with "
+ << "ABI breaking checks enabled on systems with Graphviz or gv!\n";
#endif
}
@@ -193,11 +189,11 @@ void SelectionDAG::clearGraphAttrs() {
/// setGraphAttrs - Set graph attributes for a node. (eg. "color=red".)
///
void SelectionDAG::setGraphAttrs(const SDNode *N, const char *Attrs) {
-#ifndef NDEBUG
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
NodeGraphAttrs[N] = Attrs;
#else
- errs() << "SelectionDAG::setGraphAttrs is only available in debug builds"
- << " on systems with Graphviz or gv!\n";
+ errs() << "SelectionDAG::setGraphAttrs is only available in builds with "
+ << "ABI breaking checks enabled on systems with Graphviz or gv!\n";
#endif
}
@@ -205,7 +201,7 @@ void SelectionDAG::setGraphAttrs(const SDNode *N, const char *Attrs) {
/// getGraphAttrs - Get graph attributes for a node. (eg. "color=red".)
/// Used from getNodeAttributes.
std::string SelectionDAG::getGraphAttrs(const SDNode *N) const {
-#ifndef NDEBUG
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
std::map<const SDNode *, std::string>::const_iterator I =
NodeGraphAttrs.find(N);
@@ -214,8 +210,8 @@ std::string SelectionDAG::getGraphAttrs(const SDNode *N) const {
else
return "";
#else
- errs() << "SelectionDAG::getGraphAttrs is only available in debug builds"
- << " on systems with Graphviz or gv!\n";
+ errs() << "SelectionDAG::getGraphAttrs is only available in builds with "
+ << "ABI breaking checks enabled on systems with Graphviz or gv!\n";
return std::string();
#endif
}
@@ -223,11 +219,11 @@ std::string SelectionDAG::getGraphAttrs(const SDNode *N) const {
/// setGraphColor - Convenience for setting node color attribute.
///
void SelectionDAG::setGraphColor(const SDNode *N, const char *Color) {
-#ifndef NDEBUG
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
NodeGraphAttrs[N] = std::string("color=") + Color;
#else
- errs() << "SelectionDAG::setGraphColor is only available in debug builds"
- << " on systems with Graphviz or gv!\n";
+ errs() << "SelectionDAG::setGraphColor is only available in builds with "
+ << "ABI breaking checks enabled on systems with Graphviz or gv!\n";
#endif
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index dfda7d8b9f81..19a52fde44c1 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -17,7 +17,10 @@
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GCMetadata.h"
@@ -27,6 +30,7 @@
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetOpcodes.h"
@@ -168,7 +172,7 @@ static Optional<int> findPreviousSpillSlot(const Value *Val,
const auto &RelocationMap =
Builder.FuncInfo.StatepointRelocationMaps[Relocate->getStatepoint()];
- auto It = RelocationMap.find(Relocate->getDerivedPtr());
+ auto It = RelocationMap.find(Relocate);
if (It == RelocationMap.end())
return None;
@@ -192,10 +196,10 @@ static Optional<int> findPreviousSpillSlot(const Value *Val,
for (auto &IncomingValue : Phi->incoming_values()) {
Optional<int> SpillSlot =
findPreviousSpillSlot(IncomingValue, Builder, LookUpDepth - 1);
- if (!SpillSlot.hasValue())
+ if (!SpillSlot)
return None;
- if (MergedResult.hasValue() && *MergedResult != *SpillSlot)
+ if (MergedResult && *MergedResult != *SpillSlot)
return None;
MergedResult = SpillSlot;
@@ -276,7 +280,7 @@ static void reservePreviousStackSlotForValue(const Value *IncomingValue,
const int LookUpDepth = 6;
Optional<int> Index =
findPreviousSpillSlot(IncomingValue, Builder, LookUpDepth);
- if (!Index.hasValue())
+ if (!Index)
return;
const auto &StatepointSlots = Builder.FuncInfo.StatepointStackSlots;
@@ -526,14 +530,14 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
GCStrategy &S = GFI->getStrategy();
for (const Value *V : SI.Bases) {
auto Opt = S.isGCManagedPointer(V->getType()->getScalarType());
- if (Opt.hasValue()) {
+ if (Opt) {
assert(Opt.getValue() &&
"non gc managed base pointer found in statepoint");
}
}
for (const Value *V : SI.Ptrs) {
auto Opt = S.isGCManagedPointer(V->getType()->getScalarType());
- if (Opt.hasValue()) {
+ if (Opt) {
assert(Opt.getValue() &&
"non gc managed derived pointer found in statepoint");
}
@@ -880,8 +884,9 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
DAG.getMachineNode(TargetOpcode::STATEPOINT, getCurSDLoc(), NodeTys, Ops);
DAG.setNodeMemRefs(StatepointMCNode, MemRefs);
- // For values lowered to tied-defs, create the virtual registers. Note that
- // for simplicity, we *always* create a vreg even within a single block.
+ // For values lowered to tied-defs, create the virtual registers if used
+ // in other blocks. For local gc.relocate record appropriate statepoint
+ // result in StatepointLoweringState.
DenseMap<SDValue, Register> VirtRegs;
for (const auto *Relocate : SI.GCRelocates) {
Value *Derived = Relocate->getDerivedPtr();
@@ -889,12 +894,23 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
if (!LowerAsVReg.count(SD))
continue;
+ SDValue Relocated = SDValue(StatepointMCNode, LowerAsVReg[SD]);
+
+ // Handle local relocate. Note that different relocates might
+ // map to the same SDValue.
+ if (SI.StatepointInstr->getParent() == Relocate->getParent()) {
+ SDValue Res = StatepointLowering.getLocation(SD);
+ if (Res)
+ assert(Res == Relocated);
+ else
+ StatepointLowering.setLocation(SD, Relocated);
+ continue;
+ }
+
// Handle multiple gc.relocates of the same input efficiently.
if (VirtRegs.count(SD))
continue;
- SDValue Relocated = SDValue(StatepointMCNode, LowerAsVReg[SD]);
-
auto *RetTy = Relocate->getType();
Register Reg = FuncInfo.CreateRegs(RetTy);
RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
@@ -915,8 +931,13 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
SDValue SDV = getValue(V);
SDValue Loc = StatepointLowering.getLocation(SDV);
+ bool IsLocal = (Relocate->getParent() == StatepointInstr->getParent());
+
RecordType Record;
- if (LowerAsVReg.count(SDV)) {
+ if (IsLocal && LowerAsVReg.count(SDV)) {
+ // Result is already stored in StatepointLowering
+ Record.type = RecordType::SDValueNode;
+ } else if (LowerAsVReg.count(SDV)) {
Record.type = RecordType::VReg;
assert(VirtRegs.count(SDV));
Record.payload.Reg = VirtRegs[SDV];
@@ -932,7 +953,7 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
if (Relocate->getParent() != StatepointInstr->getParent())
ExportFromCurrentBlock(V);
}
- RelocationMap[V] = Record;
+ RelocationMap[Relocate] = Record;
}
@@ -1148,8 +1169,8 @@ void SelectionDAGBuilder::LowerCallSiteWithDeoptBundleImpl(
unsigned DefaultID = StatepointDirectives::DeoptBundleStatepointID;
auto SD = parseStatepointDirectivesFromAttrs(Call->getAttributes());
- SI.ID = SD.StatepointID.getValueOr(DefaultID);
- SI.NumPatchBytes = SD.NumPatchBytes.getValueOr(0);
+ SI.ID = SD.StatepointID.value_or(DefaultID);
+ SI.NumPatchBytes = SD.NumPatchBytes.value_or(0);
SI.DeoptState =
ArrayRef<const Use>(DeoptBundle.Inputs.begin(), DeoptBundle.Inputs.end());
@@ -1210,11 +1231,19 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) {
const Value *DerivedPtr = Relocate.getDerivedPtr();
auto &RelocationMap =
FuncInfo.StatepointRelocationMaps[Relocate.getStatepoint()];
- auto SlotIt = RelocationMap.find(DerivedPtr);
+ auto SlotIt = RelocationMap.find(&Relocate);
assert(SlotIt != RelocationMap.end() && "Relocating not lowered gc value");
const RecordType &Record = SlotIt->second;
// If relocation was done via virtual register..
+ if (Record.type == RecordType::SDValueNode) {
+ assert(Relocate.getStatepoint()->getParent() == Relocate.getParent() &&
+ "Nonlocal gc.relocate mapped via SDValue");
+ SDValue SDV = StatepointLowering.getLocation(getValue(DerivedPtr));
+ assert(SDV.getNode() && "empty SDValue");
+ setValue(&Relocate, SDV);
+ return;
+ }
if (Record.type == RecordType::VReg) {
Register InReg = Record.payload.Reg;
RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index f6d1fa87676f..a6b471ea22b7 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -13,13 +13,13 @@
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/CodeGenCommonISel.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
-#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/GlobalVariable.h"
@@ -30,7 +30,6 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include <cctype>
using namespace llvm;
@@ -94,6 +93,8 @@ bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
// (We look for a CopyFromReg reading a virtual register that is used
// for the function live-in value of register Reg)
SDValue Value = OutVals[I];
+ if (Value->getOpcode() == ISD::AssertZext)
+ Value = Value.getOperand(0);
if (Value->getOpcode() != ISD::CopyFromReg)
return false;
Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
@@ -121,7 +122,7 @@ void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
Alignment = Call->getParamStackAlign(ArgIdx);
IndirectType = nullptr;
- assert(IsByVal + IsPreallocated + IsInAlloca <= 1 &&
+ assert(IsByVal + IsPreallocated + IsInAlloca + IsSRet <= 1 &&
"multiple ABI attributes?");
if (IsByVal) {
IndirectType = Call->getParamByValType(ArgIdx);
@@ -132,6 +133,8 @@ void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
IndirectType = Call->getParamPreallocatedType(ArgIdx);
if (IsInAlloca)
IndirectType = Call->getParamInAllocaType(ArgIdx);
+ if (IsSRet)
+ IndirectType = Call->getParamStructRetType(ArgIdx);
}
/// Generate a libcall taking the given operands as arguments and returning a
@@ -193,7 +196,8 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
bool TargetLowering::findOptimalMemOpLowering(
std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
unsigned SrcAS, const AttributeList &FuncAttributes) const {
- if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
+ if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() &&
+ Op.getSrcAlign() < Op.getDstAlign())
return false;
EVT VT = getOptimalMemOpType(Op, FuncAttributes);
@@ -905,6 +909,132 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedVectorElts(
Depth);
}
+// Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
+// or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
+static SDValue combineShiftToAVG(SDValue Op, SelectionDAG &DAG,
+ const TargetLowering &TLI,
+ const APInt &DemandedBits,
+ const APInt &DemandedElts,
+ unsigned Depth) {
+ assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
+ "SRL or SRA node is required here!");
+ // Is the right shift using an immediate value of 1?
+ ConstantSDNode *N1C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
+ if (!N1C || !N1C->isOne())
+ return SDValue();
+
+ // We are looking for an avgfloor
+ // add(ext, ext)
+ // or one of these as a avgceil
+ // add(add(ext, ext), 1)
+ // add(add(ext, 1), ext)
+ // add(ext, add(ext, 1))
+ SDValue Add = Op.getOperand(0);
+ if (Add.getOpcode() != ISD::ADD)
+ return SDValue();
+
+ SDValue ExtOpA = Add.getOperand(0);
+ SDValue ExtOpB = Add.getOperand(1);
+ auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3) {
+ ConstantSDNode *ConstOp;
+ if ((ConstOp = isConstOrConstSplat(Op1, DemandedElts)) &&
+ ConstOp->isOne()) {
+ ExtOpA = Op2;
+ ExtOpB = Op3;
+ return true;
+ }
+ if ((ConstOp = isConstOrConstSplat(Op2, DemandedElts)) &&
+ ConstOp->isOne()) {
+ ExtOpA = Op1;
+ ExtOpB = Op3;
+ return true;
+ }
+ if ((ConstOp = isConstOrConstSplat(Op3, DemandedElts)) &&
+ ConstOp->isOne()) {
+ ExtOpA = Op1;
+ ExtOpB = Op2;
+ return true;
+ }
+ return false;
+ };
+ bool IsCeil =
+ (ExtOpA.getOpcode() == ISD::ADD &&
+ MatchOperands(ExtOpA.getOperand(0), ExtOpA.getOperand(1), ExtOpB)) ||
+ (ExtOpB.getOpcode() == ISD::ADD &&
+ MatchOperands(ExtOpB.getOperand(0), ExtOpB.getOperand(1), ExtOpA));
+
+ // If the shift is signed (sra):
+ // - Needs >= 2 sign bit for both operands.
+ // - Needs >= 2 zero bits.
+ // If the shift is unsigned (srl):
+ // - Needs >= 1 zero bit for both operands.
+ // - Needs 1 demanded bit zero and >= 2 sign bits.
+ unsigned ShiftOpc = Op.getOpcode();
+ bool IsSigned = false;
+ unsigned KnownBits;
+ unsigned NumSignedA = DAG.ComputeNumSignBits(ExtOpA, DemandedElts, Depth);
+ unsigned NumSignedB = DAG.ComputeNumSignBits(ExtOpB, DemandedElts, Depth);
+ unsigned NumSigned = std::min(NumSignedA, NumSignedB) - 1;
+ unsigned NumZeroA =
+ DAG.computeKnownBits(ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
+ unsigned NumZeroB =
+ DAG.computeKnownBits(ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
+ unsigned NumZero = std::min(NumZeroA, NumZeroB);
+
+ switch (ShiftOpc) {
+ default:
+ llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
+ case ISD::SRA: {
+ if (NumZero >= 2 && NumSigned < NumZero) {
+ IsSigned = false;
+ KnownBits = NumZero;
+ break;
+ }
+ if (NumSigned >= 1) {
+ IsSigned = true;
+ KnownBits = NumSigned;
+ break;
+ }
+ return SDValue();
+ }
+ case ISD::SRL: {
+ if (NumZero >= 1 && NumSigned < NumZero) {
+ IsSigned = false;
+ KnownBits = NumZero;
+ break;
+ }
+ if (NumSigned >= 1 && DemandedBits.isSignBitClear()) {
+ IsSigned = true;
+ KnownBits = NumSigned;
+ break;
+ }
+ return SDValue();
+ }
+ }
+
+ unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
+ : (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
+
+ // Find the smallest power-2 type that is legal for this vector size and
+ // operation, given the original type size and the number of known sign/zero
+ // bits.
+ EVT VT = Op.getValueType();
+ unsigned MinWidth =
+ std::max<unsigned>(VT.getScalarSizeInBits() - KnownBits, 8);
+ EVT NVT = EVT::getIntegerVT(*DAG.getContext(), PowerOf2Ceil(MinWidth));
+ if (VT.isVector())
+ NVT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
+ if (!TLI.isOperationLegalOrCustom(AVGOpc, NVT))
+ return SDValue();
+
+ SDLoc DL(Op);
+ SDValue ResultAVG =
+ DAG.getNode(AVGOpc, DL, NVT, DAG.getNode(ISD::TRUNCATE, DL, NVT, ExtOpA),
+ DAG.getNode(ISD::TRUNCATE, DL, NVT, ExtOpB));
+ return DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL, VT,
+ ResultAVG);
+}
+
/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
/// result of Op are ever used downstream. If we can use this information to
/// simplify Op, create a new simplified DAG node and return true, returning the
@@ -989,7 +1119,7 @@ bool TargetLowering::SimplifyDemandedBits(
KnownBits SrcKnown;
SDValue Src = Op.getOperand(0);
unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
- APInt SrcDemandedBits = DemandedBits.zextOrSelf(SrcBitWidth);
+ APInt SrcDemandedBits = DemandedBits.zext(SrcBitWidth);
if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
return true;
@@ -1105,7 +1235,7 @@ bool TargetLowering::SimplifyDemandedBits(
break;
uint64_t Idx = Op.getConstantOperandVal(1);
unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
- APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
+ APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
Depth + 1))
@@ -1409,6 +1539,19 @@ bool TargetLowering::SimplifyDemandedBits(
// Only known if known in both the LHS and RHS.
Known = KnownBits::commonBits(Known, Known2);
break;
+ case ISD::VSELECT:
+ if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
+ Known, TLO, Depth + 1))
+ return true;
+ if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
+ Known2, TLO, Depth + 1))
+ return true;
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+ assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
+
+ // Only known if known in both the LHS and RHS.
+ Known = KnownBits::commonBits(Known, Known2);
+ break;
case ISD::SELECT_CC:
if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, Known, TLO,
Depth + 1))
@@ -1542,6 +1685,16 @@ bool TargetLowering::SimplifyDemandedBits(
// low bits known zero.
Known.Zero.setLowBits(ShAmt);
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (!InDemandedMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
+ SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
+ Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
+ if (DemandedOp0) {
+ SDValue NewOp = TLO.DAG.getNode(ISD::SHL, dl, VT, DemandedOp0, Op1);
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
+
// Try shrinking the operation as long as the shift amount will still be
// in range.
if ((ShAmt < DemandedBits.getActiveBits()) &&
@@ -1567,6 +1720,11 @@ bool TargetLowering::SimplifyDemandedBits(
SDValue Op1 = Op.getOperand(1);
EVT ShiftVT = Op1.getValueType();
+ // Try to match AVG patterns.
+ if (SDValue AVG = combineShiftToAVG(Op, TLO.DAG, *this, DemandedBits,
+ DemandedElts, Depth + 1))
+ return TLO.CombineTo(Op, AVG);
+
if (const APInt *SA =
TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
unsigned ShAmt = SA->getZExtValue();
@@ -1633,6 +1791,11 @@ bool TargetLowering::SimplifyDemandedBits(
if (DemandedBits.isOne())
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
+ // Try to match AVG patterns.
+ if (SDValue AVG = combineShiftToAVG(Op, TLO.DAG, *this, DemandedBits,
+ DemandedElts, Depth + 1))
+ return TLO.CombineTo(Op, AVG);
+
if (const APInt *SA =
TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
unsigned ShAmt = SA->getZExtValue();
@@ -1727,6 +1890,22 @@ bool TargetLowering::SimplifyDemandedBits(
Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
Known.One |= Known2.One;
Known.Zero |= Known2.Zero;
+
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||
+ !DemandedElts.isAllOnes()) {
+ SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
+ Op0, Demanded0, DemandedElts, TLO.DAG, Depth + 1);
+ SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
+ Op1, Demanded1, DemandedElts, TLO.DAG, Depth + 1);
+ if (DemandedOp0 || DemandedOp1) {
+ DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
+ DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
+ SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedOp0,
+ DemandedOp1, Op2);
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
}
// For pow-2 bitwidths we only demand the bottom modulo amt bits.
@@ -1899,7 +2078,8 @@ bool TargetLowering::SimplifyDemandedBits(
// bit is demanded.
InputDemandedBits.setBit(ExVTBits - 1);
- if (SimplifyDemandedBits(Op0, InputDemandedBits, Known, TLO, Depth + 1))
+ if (SimplifyDemandedBits(Op0, InputDemandedBits, DemandedElts, Known, TLO,
+ Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
@@ -1965,7 +2145,7 @@ bool TargetLowering::SimplifyDemandedBits(
}
APInt InDemandedBits = DemandedBits.trunc(InBits);
- APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
+ APInt InDemandedElts = DemandedElts.zext(InElts);
if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
Depth + 1))
return true;
@@ -2002,7 +2182,7 @@ bool TargetLowering::SimplifyDemandedBits(
}
APInt InDemandedBits = DemandedBits.trunc(InBits);
- APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
+ APInt InDemandedElts = DemandedElts.zext(InElts);
// Since some of the sign extended bits are demanded, we know that the sign
// bit is demanded.
@@ -2046,7 +2226,7 @@ bool TargetLowering::SimplifyDemandedBits(
return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
APInt InDemandedBits = DemandedBits.trunc(InBits);
- APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
+ APInt InDemandedElts = DemandedElts.zext(InElts);
if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
Depth + 1))
return true;
@@ -2265,9 +2445,27 @@ bool TargetLowering::SimplifyDemandedBits(
break;
}
case ISD::MUL:
- // 'Quadratic Reciprocity': mul(x,x) -> 0 if we're only demanding bit[1]
- if (DemandedBits == 2 && Op.getOperand(0) == Op.getOperand(1))
- return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
+ if (DemandedBits.isPowerOf2()) {
+ // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
+ // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
+ // odd (has LSB set), then the left-shifted low bit of X is the answer.
+ unsigned CTZ = DemandedBits.countTrailingZeros();
+ ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
+ if (C && C->getAPIntValue().countTrailingZeros() == CTZ) {
+ EVT ShiftAmtTy = getShiftAmountTy(VT, TLO.DAG.getDataLayout());
+ SDValue AmtC = TLO.DAG.getConstant(CTZ, dl, ShiftAmtTy);
+ SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, Op.getOperand(0), AmtC);
+ return TLO.CombineTo(Op, Shl);
+ }
+ }
+ // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
+ // X * X is odd iff X is odd.
+ // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
+ if (Op.getOperand(0) == Op.getOperand(1) && DemandedBits.ult(4)) {
+ SDValue One = TLO.DAG.getConstant(1, dl, VT);
+ SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), One);
+ return TLO.CombineTo(Op, And1);
+ }
LLVM_FALLTHROUGH;
case ISD::ADD:
case ISD::SUB: {
@@ -2330,6 +2528,49 @@ bool TargetLowering::SimplifyDemandedBits(
return TLO.CombineTo(Op, NewOp);
}
+ // Match a multiply with a disguised negated-power-of-2 and convert to a
+ // an equivalent shift-left amount.
+ // Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
+ auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
+ if (Mul.getOpcode() != ISD::MUL || !Mul.hasOneUse())
+ return 0;
+
+ // Don't touch opaque constants. Also, ignore zero and power-of-2
+ // multiplies. Those will get folded later.
+ ConstantSDNode *MulC = isConstOrConstSplat(Mul.getOperand(1));
+ if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
+ !MulC->getAPIntValue().isPowerOf2()) {
+ APInt UnmaskedC = MulC->getAPIntValue() | HighMask;
+ if (UnmaskedC.isNegatedPowerOf2())
+ return (-UnmaskedC).logBase2();
+ }
+ return 0;
+ };
+
+ auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y, unsigned ShlAmt) {
+ EVT ShiftAmtTy = getShiftAmountTy(VT, TLO.DAG.getDataLayout());
+ SDValue ShlAmtC = TLO.DAG.getConstant(ShlAmt, dl, ShiftAmtTy);
+ SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, X, ShlAmtC);
+ SDValue Res = TLO.DAG.getNode(NT, dl, VT, Y, Shl);
+ return TLO.CombineTo(Op, Res);
+ };
+
+ if (isOperationLegalOrCustom(ISD::SHL, VT)) {
+ if (Op.getOpcode() == ISD::ADD) {
+ // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
+ if (unsigned ShAmt = getShiftLeftAmt(Op0))
+ return foldMul(ISD::SUB, Op0.getOperand(0), Op1, ShAmt);
+ // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC))
+ if (unsigned ShAmt = getShiftLeftAmt(Op1))
+ return foldMul(ISD::SUB, Op1.getOperand(0), Op0, ShAmt);
+ }
+ if (Op.getOpcode() == ISD::SUB) {
+ // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC))
+ if (unsigned ShAmt = getShiftLeftAmt(Op1))
+ return foldMul(ISD::ADD, Op1.getOperand(0), Op0, ShAmt);
+ }
+ }
+
LLVM_FALLTHROUGH;
}
default:
@@ -2347,7 +2588,8 @@ bool TargetLowering::SimplifyDemandedBits(
// If we know the value of all of the demanded bits, return this as a
// constant.
- if (DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
+ if (!isTargetCanonicalConstantNode(Op) &&
+ DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
// Avoid folding to a constant if any OpaqueConstant is involved.
const SDNode *N = Op.getNode();
for (SDNode *Op :
@@ -2370,13 +2612,12 @@ bool TargetLowering::SimplifyDemandedBits(
bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op,
const APInt &DemandedElts,
- APInt &KnownUndef,
- APInt &KnownZero,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
!DCI.isBeforeLegalizeOps());
+ APInt KnownUndef, KnownZero;
bool Simplified =
SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
if (Simplified) {
@@ -2447,6 +2688,10 @@ bool TargetLowering::SimplifyDemandedVectorElts(
KnownUndef = KnownZero = APInt::getZero(NumElts);
+ const TargetLowering &TLI = TLO.DAG.getTargetLoweringInfo();
+ if (!TLI.shouldSimplifyDemandedVectorElts(Op, TLO))
+ return false;
+
// TODO: For now we assume we know nothing about scalable vectors.
if (VT.isScalableVector())
return false;
@@ -2565,6 +2810,21 @@ bool TargetLowering::SimplifyDemandedVectorElts(
if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
TLO, Depth + 1))
return true;
+
+ // The bitcast has split each wide element into a number of
+ // narrow subelements. We have just computed the Known bits
+ // for wide elements. See if element splitting results in
+ // some subelements being zero. Only for demanded elements!
+ for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) {
+ if (!Known.Zero.extractBits(EltSizeInBits, SubElt * EltSizeInBits)
+ .isAllOnes())
+ continue;
+ for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) {
+ unsigned Elt = Scale * SrcElt + SubElt;
+ if (DemandedElts[Elt])
+ KnownZero.setBit(Elt);
+ }
+ }
}
// If the src element is zero/undef then all the output elements will be -
@@ -2646,6 +2906,25 @@ bool TargetLowering::SimplifyDemandedVectorElts(
KnownUndef.insertBits(SubUndef, i * NumSubElts);
KnownZero.insertBits(SubZero, i * NumSubElts);
}
+
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (!DemandedElts.isAllOnes()) {
+ bool FoundNewSub = false;
+ SmallVector<SDValue, 2> DemandedSubOps;
+ for (unsigned i = 0; i != NumSubVecs; ++i) {
+ SDValue SubOp = Op.getOperand(i);
+ APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
+ SDValue NewSubOp = SimplifyMultipleUseDemandedVectorElts(
+ SubOp, SubElts, TLO.DAG, Depth + 1);
+ DemandedSubOps.push_back(NewSubOp ? NewSubOp : SubOp);
+ FoundNewSub = NewSubOp ? true : FoundNewSub;
+ }
+ if (FoundNewSub) {
+ SDValue NewOp =
+ TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, DemandedSubOps);
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
break;
}
case ISD::INSERT_SUBVECTOR: {
@@ -2699,7 +2978,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
break;
uint64_t Idx = Op.getConstantOperandVal(1);
unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
- APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
+ APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
APInt SrcUndef, SrcZero;
if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
@@ -2858,7 +3137,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
APInt SrcUndef, SrcZero;
SDValue Src = Op.getOperand(0);
unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
- APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts);
+ APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts);
if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
Depth + 1))
return true;
@@ -3618,6 +3897,115 @@ static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT,
return SDValue();
}
+static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1,
+ ISD::CondCode Cond, const SDLoc &dl,
+ SelectionDAG &DAG) {
+ if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
+ return SDValue();
+
+ auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
+ if (!C1 || !(C1->isZero() || C1->isAllOnes()))
+ return SDValue();
+
+ auto getRotateSource = [](SDValue X) {
+ if (X.getOpcode() == ISD::ROTL || X.getOpcode() == ISD::ROTR)
+ return X.getOperand(0);
+ return SDValue();
+ };
+
+ // Peek through a rotated value compared against 0 or -1:
+ // (rot X, Y) == 0/-1 --> X == 0/-1
+ // (rot X, Y) != 0/-1 --> X != 0/-1
+ if (SDValue R = getRotateSource(N0))
+ return DAG.getSetCC(dl, VT, R, N1, Cond);
+
+ // Peek through an 'or' of a rotated value compared against 0:
+ // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
+ // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
+ //
+ // TODO: Add the 'and' with -1 sibling.
+ // TODO: Recurse through a series of 'or' ops to find the rotate.
+ EVT OpVT = N0.getValueType();
+ if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
+ if (SDValue R = getRotateSource(N0.getOperand(0))) {
+ SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(1));
+ return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
+ }
+ if (SDValue R = getRotateSource(N0.getOperand(1))) {
+ SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(0));
+ return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
+ }
+ }
+
+ return SDValue();
+}
+
+static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1,
+ ISD::CondCode Cond, const SDLoc &dl,
+ SelectionDAG &DAG) {
+ // If we are testing for all-bits-clear, we might be able to do that with
+ // less shifting since bit-order does not matter.
+ if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
+ return SDValue();
+
+ auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
+ if (!C1 || !C1->isZero())
+ return SDValue();
+
+ if (!N0.hasOneUse() ||
+ (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
+ return SDValue();
+
+ unsigned BitWidth = N0.getScalarValueSizeInBits();
+ auto *ShAmtC = isConstOrConstSplat(N0.getOperand(2));
+ if (!ShAmtC || ShAmtC->getAPIntValue().uge(BitWidth))
+ return SDValue();
+
+ // Canonicalize fshr as fshl to reduce pattern-matching.
+ unsigned ShAmt = ShAmtC->getZExtValue();
+ if (N0.getOpcode() == ISD::FSHR)
+ ShAmt = BitWidth - ShAmt;
+
+ // Match an 'or' with a specific operand 'Other' in either commuted variant.
+ SDValue X, Y;
+ auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
+ if (Or.getOpcode() != ISD::OR || !Or.hasOneUse())
+ return false;
+ if (Or.getOperand(0) == Other) {
+ X = Or.getOperand(0);
+ Y = Or.getOperand(1);
+ return true;
+ }
+ if (Or.getOperand(1) == Other) {
+ X = Or.getOperand(1);
+ Y = Or.getOperand(0);
+ return true;
+ }
+ return false;
+ };
+
+ EVT OpVT = N0.getValueType();
+ EVT ShAmtVT = N0.getOperand(2).getValueType();
+ SDValue F0 = N0.getOperand(0);
+ SDValue F1 = N0.getOperand(1);
+ if (matchOr(F0, F1)) {
+ // fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
+ SDValue NewShAmt = DAG.getConstant(ShAmt, dl, ShAmtVT);
+ SDValue Shift = DAG.getNode(ISD::SHL, dl, OpVT, Y, NewShAmt);
+ SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
+ return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
+ }
+ if (matchOr(F1, F0)) {
+ // fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
+ SDValue NewShAmt = DAG.getConstant(BitWidth - ShAmt, dl, ShAmtVT);
+ SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, Y, NewShAmt);
+ SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
+ return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
+ }
+
+ return SDValue();
+}
+
/// Try to simplify a setcc built with the specified operands and cc. If it is
/// unable to simplify it, return a null SDValue.
SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
@@ -3632,13 +4020,17 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
return Fold;
+ bool N0ConstOrSplat =
+ isConstOrConstSplat(N0, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
+ bool N1ConstOrSplat =
+ isConstOrConstSplat(N1, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
+
// Ensure that the constant occurs on the RHS and fold constant comparisons.
// TODO: Handle non-splat vector constants. All undef causes trouble.
// FIXME: We can't yet fold constant scalable vector splats, so avoid an
// infinite loop here when we encounter one.
ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond);
- if (isConstOrConstSplat(N0) &&
- (!OpVT.isScalableVector() || !isConstOrConstSplat(N1)) &&
+ if (N0ConstOrSplat && (!OpVT.isScalableVector() || !N1ConstOrSplat) &&
(DCI.isBeforeLegalizeOps() ||
isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
@@ -3647,13 +4039,19 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// -- but in reverse order -- then try to commute the operands of this setcc
// to match. A matching pair of setcc (cmp) and sub may be combined into 1
// instruction on some targets.
- if (!isConstOrConstSplat(N0) && !isConstOrConstSplat(N1) &&
+ if (!N0ConstOrSplat && !N1ConstOrSplat &&
(DCI.isBeforeLegalizeOps() ||
isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) &&
!DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1}))
return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
+ if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG))
+ return V;
+
+ if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG))
+ return V;
+
if (auto *N1C = isConstOrConstSplat(N1)) {
const APInt &C1 = N1C->getAPIntValue();
@@ -4399,37 +4797,30 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
// Turn (X+C1) == C2 --> X == C2-C1
- if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse()) {
- return DAG.getSetCC(dl, VT, N0.getOperand(0),
- DAG.getConstant(RHSC->getAPIntValue()-
- LHSR->getAPIntValue(),
- dl, N0.getValueType()), Cond);
- }
-
- // Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0.
- if (N0.getOpcode() == ISD::XOR)
- // If we know that all of the inverted bits are zero, don't bother
- // performing the inversion.
- if (DAG.MaskedValueIsZero(N0.getOperand(0), ~LHSR->getAPIntValue()))
- return
- DAG.getSetCC(dl, VT, N0.getOperand(0),
- DAG.getConstant(LHSR->getAPIntValue() ^
- RHSC->getAPIntValue(),
- dl, N0.getValueType()),
- Cond);
+ if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse())
+ return DAG.getSetCC(
+ dl, VT, N0.getOperand(0),
+ DAG.getConstant(RHSC->getAPIntValue() - LHSR->getAPIntValue(),
+ dl, N0.getValueType()),
+ Cond);
+
+ // Turn (X^C1) == C2 --> X == C1^C2
+ if (N0.getOpcode() == ISD::XOR && N0.getNode()->hasOneUse())
+ return DAG.getSetCC(
+ dl, VT, N0.getOperand(0),
+ DAG.getConstant(LHSR->getAPIntValue() ^ RHSC->getAPIntValue(),
+ dl, N0.getValueType()),
+ Cond);
}
// Turn (C1-X) == C2 --> X == C1-C2
- if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) {
- if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) {
- return
- DAG.getSetCC(dl, VT, N0.getOperand(1),
- DAG.getConstant(SUBC->getAPIntValue() -
- RHSC->getAPIntValue(),
- dl, N0.getValueType()),
- Cond);
- }
- }
+ if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
+ if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse())
+ return DAG.getSetCC(
+ dl, VT, N0.getOperand(1),
+ DAG.getConstant(SUBC->getAPIntValue() - RHSC->getAPIntValue(),
+ dl, N0.getValueType()),
+ Cond);
// Could RHSC fold directly into a compare?
if (RHSC->getValueType(0).getSizeInBits() <= 64)
@@ -4582,13 +4973,14 @@ TargetLowering::getConstraintType(StringRef Constraint) const {
case 'o': // offsetable
case 'V': // not offsetable
return C_Memory;
+ case 'p': // Address.
+ return C_Address;
case 'n': // Simple Integer
case 'E': // Floating Point Constant
case 'F': // Floating Point Constant
return C_Immediate;
case 'i': // Simple Integer or Relocatable Constant
case 's': // Relocatable Constant
- case 'p': // Address.
case 'X': // Allow ANY value.
case 'I': // Target registers.
case 'J':
@@ -4826,8 +5218,8 @@ TargetLowering::ParseConstraints(const DataLayout &DL,
if (OpInfo.CallOperandVal) {
llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
if (OpInfo.isIndirect) {
- OpTy = Call.getAttributes().getParamElementType(ArgNo);
- assert(OpTy && "Indirect opernad must have elementtype attribute");
+ OpTy = Call.getParamElementType(ArgNo);
+ assert(OpTy && "Indirect operand must have elementtype attribute");
}
// Look for vector wrapped in a struct. e.g. { <16 x i8> }.
@@ -4962,6 +5354,7 @@ static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
case TargetLowering::C_RegisterClass:
return 2;
case TargetLowering::C_Memory:
+ case TargetLowering::C_Address:
return 3;
}
llvm_unreachable("Invalid constraint type");
@@ -5232,6 +5625,17 @@ SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
return SDValue();
}
+SDValue
+TargetLowering::BuildSREMPow2(SDNode *N, const APInt &Divisor,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDNode *> &Created) const {
+ AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLI.isIntDivCheap(N->getValueType(0), Attr))
+ return SDValue(N, 0); // Lower SREM as SREM
+ return SDValue();
+}
+
/// Given an ISD::SDIV node expressing a divide by constant,
/// return a DAG expression to select that will generate the same value by
/// multiplying by a magic number.
@@ -7016,6 +7420,30 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
return true;
}
+SDValue
+TargetLowering::createSelectForFMINNUM_FMAXNUM(SDNode *Node,
+ SelectionDAG &DAG) const {
+ unsigned Opcode = Node->getOpcode();
+ assert((Opcode == ISD::FMINNUM || Opcode == ISD::FMAXNUM ||
+ Opcode == ISD::STRICT_FMINNUM || Opcode == ISD::STRICT_FMAXNUM) &&
+ "Wrong opcode");
+
+ if (Node->getFlags().hasNoNaNs()) {
+ ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
+ SDValue Op1 = Node->getOperand(0);
+ SDValue Op2 = Node->getOperand(1);
+ SDValue SelCC = DAG.getSelectCC(SDLoc(Node), Op1, Op2, Op1, Op2, Pred);
+ // Copy FMF flags, but always set the no-signed-zeros flag
+ // as this is implied by the FMINNUM/FMAXNUM semantics.
+ SDNodeFlags Flags = Node->getFlags();
+ Flags.setNoSignedZeros(true);
+ SelCC->setFlags(Flags);
+ return SelCC;
+ }
+
+ return SDValue();
+}
+
SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
SelectionDAG &DAG) const {
SDLoc dl(Node);
@@ -7058,29 +7486,234 @@ SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
}
}
- // If none of the above worked, but there are no NaNs, then expand to
- // a compare/select sequence. This is required for correctness since
- // InstCombine might have canonicalized a fcmp+select sequence to a
- // FMINNUM/FMAXNUM node. If we were to fall through to the default
- // expansion to libcall, we might introduce a link-time dependency
- // on libm into a file that originally did not have one.
- if (Node->getFlags().hasNoNaNs()) {
- ISD::CondCode Pred =
- Node->getOpcode() == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
- SDValue Op1 = Node->getOperand(0);
- SDValue Op2 = Node->getOperand(1);
- SDValue SelCC = DAG.getSelectCC(dl, Op1, Op2, Op1, Op2, Pred);
- // Copy FMF flags, but always set the no-signed-zeros flag
- // as this is implied by the FMINNUM/FMAXNUM semantics.
- SDNodeFlags Flags = Node->getFlags();
- Flags.setNoSignedZeros(true);
- SelCC->setFlags(Flags);
+ if (SDValue SelCC = createSelectForFMINNUM_FMAXNUM(Node, DAG))
return SelCC;
- }
return SDValue();
}
+SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
+ unsigned Test, SDNodeFlags Flags,
+ const SDLoc &DL,
+ SelectionDAG &DAG) const {
+ EVT OperandVT = Op.getValueType();
+ assert(OperandVT.isFloatingPoint());
+
+ // Degenerated cases.
+ if (Test == 0)
+ return DAG.getBoolConstant(false, DL, ResultVT, OperandVT);
+ if ((Test & fcAllFlags) == fcAllFlags)
+ return DAG.getBoolConstant(true, DL, ResultVT, OperandVT);
+
+ // PPC double double is a pair of doubles, of which the higher part determines
+ // the value class.
+ if (OperandVT == MVT::ppcf128) {
+ Op = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::f64, Op,
+ DAG.getConstant(1, DL, MVT::i32));
+ OperandVT = MVT::f64;
+ }
+
+ // Some checks may be represented as inversion of simpler check, for example
+ // "inf|normal|subnormal|zero" => !"nan".
+ bool IsInverted = false;
+ if (unsigned InvertedCheck = getInvertedFPClassTest(Test)) {
+ IsInverted = true;
+ Test = InvertedCheck;
+ }
+
+ // Floating-point type properties.
+ EVT ScalarFloatVT = OperandVT.getScalarType();
+ const Type *FloatTy = ScalarFloatVT.getTypeForEVT(*DAG.getContext());
+ const llvm::fltSemantics &Semantics = FloatTy->getFltSemantics();
+ bool IsF80 = (ScalarFloatVT == MVT::f80);
+
+ // Some checks can be implemented using float comparisons, if floating point
+ // exceptions are ignored.
+ if (Flags.hasNoFPExcept() &&
+ isOperationLegalOrCustom(ISD::SETCC, OperandVT.getScalarType())) {
+ if (Test == fcZero)
+ return DAG.getSetCC(DL, ResultVT, Op,
+ DAG.getConstantFP(0.0, DL, OperandVT),
+ IsInverted ? ISD::SETUNE : ISD::SETOEQ);
+ if (Test == fcNan)
+ return DAG.getSetCC(DL, ResultVT, Op, Op,
+ IsInverted ? ISD::SETO : ISD::SETUO);
+ }
+
+ // In the general case use integer operations.
+ unsigned BitSize = OperandVT.getScalarSizeInBits();
+ EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), BitSize);
+ if (OperandVT.isVector())
+ IntVT = EVT::getVectorVT(*DAG.getContext(), IntVT,
+ OperandVT.getVectorElementCount());
+ SDValue OpAsInt = DAG.getBitcast(IntVT, Op);
+
+ // Various masks.
+ APInt SignBit = APInt::getSignMask(BitSize);
+ APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
+ APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
+ const unsigned ExplicitIntBitInF80 = 63;
+ APInt ExpMask = Inf;
+ if (IsF80)
+ ExpMask.clearBit(ExplicitIntBitInF80);
+ APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
+ APInt QNaNBitMask =
+ APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
+ APInt InvertionMask = APInt::getAllOnesValue(ResultVT.getScalarSizeInBits());
+
+ SDValue ValueMaskV = DAG.getConstant(ValueMask, DL, IntVT);
+ SDValue SignBitV = DAG.getConstant(SignBit, DL, IntVT);
+ SDValue ExpMaskV = DAG.getConstant(ExpMask, DL, IntVT);
+ SDValue ZeroV = DAG.getConstant(0, DL, IntVT);
+ SDValue InfV = DAG.getConstant(Inf, DL, IntVT);
+ SDValue ResultInvertionMask = DAG.getConstant(InvertionMask, DL, ResultVT);
+
+ SDValue Res;
+ const auto appendResult = [&](SDValue PartialRes) {
+ if (PartialRes) {
+ if (Res)
+ Res = DAG.getNode(ISD::OR, DL, ResultVT, Res, PartialRes);
+ else
+ Res = PartialRes;
+ }
+ };
+
+ SDValue IntBitIsSetV; // Explicit integer bit in f80 mantissa is set.
+ const auto getIntBitIsSet = [&]() -> SDValue {
+ if (!IntBitIsSetV) {
+ APInt IntBitMask(BitSize, 0);
+ IntBitMask.setBit(ExplicitIntBitInF80);
+ SDValue IntBitMaskV = DAG.getConstant(IntBitMask, DL, IntVT);
+ SDValue IntBitV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, IntBitMaskV);
+ IntBitIsSetV = DAG.getSetCC(DL, ResultVT, IntBitV, ZeroV, ISD::SETNE);
+ }
+ return IntBitIsSetV;
+ };
+
+ // Split the value into sign bit and absolute value.
+ SDValue AbsV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ValueMaskV);
+ SDValue SignV = DAG.getSetCC(DL, ResultVT, OpAsInt,
+ DAG.getConstant(0.0, DL, IntVT), ISD::SETLT);
+
+ // Tests that involve more than one class should be processed first.
+ SDValue PartialRes;
+
+ if (IsF80)
+ ; // Detect finite numbers of f80 by checking individual classes because
+ // they have different settings of the explicit integer bit.
+ else if ((Test & fcFinite) == fcFinite) {
+ // finite(V) ==> abs(V) < exp_mask
+ PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
+ Test &= ~fcFinite;
+ } else if ((Test & fcFinite) == fcPosFinite) {
+ // finite(V) && V > 0 ==> V < exp_mask
+ PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ExpMaskV, ISD::SETULT);
+ Test &= ~fcPosFinite;
+ } else if ((Test & fcFinite) == fcNegFinite) {
+ // finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1
+ PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
+ PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
+ Test &= ~fcNegFinite;
+ }
+ appendResult(PartialRes);
+
+ // Check for individual classes.
+
+ if (unsigned PartialCheck = Test & fcZero) {
+ if (PartialCheck == fcPosZero)
+ PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ZeroV, ISD::SETEQ);
+ else if (PartialCheck == fcZero)
+ PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ZeroV, ISD::SETEQ);
+ else // ISD::fcNegZero
+ PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, SignBitV, ISD::SETEQ);
+ appendResult(PartialRes);
+ }
+
+ if (unsigned PartialCheck = Test & fcInf) {
+ if (PartialCheck == fcPosInf)
+ PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, InfV, ISD::SETEQ);
+ else if (PartialCheck == fcInf)
+ PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETEQ);
+ else { // ISD::fcNegInf
+ APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
+ SDValue NegInfV = DAG.getConstant(NegInf, DL, IntVT);
+ PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, NegInfV, ISD::SETEQ);
+ }
+ appendResult(PartialRes);
+ }
+
+ if (unsigned PartialCheck = Test & fcNan) {
+ APInt InfWithQnanBit = Inf | QNaNBitMask;
+ SDValue InfWithQnanBitV = DAG.getConstant(InfWithQnanBit, DL, IntVT);
+ if (PartialCheck == fcNan) {
+ // isnan(V) ==> abs(V) > int(inf)
+ PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
+ if (IsF80) {
+ // Recognize unsupported values as NaNs for compatibility with glibc.
+ // In them (exp(V)==0) == int_bit.
+ SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, AbsV, ExpMaskV);
+ SDValue ExpIsZero =
+ DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
+ SDValue IsPseudo =
+ DAG.getSetCC(DL, ResultVT, getIntBitIsSet(), ExpIsZero, ISD::SETEQ);
+ PartialRes = DAG.getNode(ISD::OR, DL, ResultVT, PartialRes, IsPseudo);
+ }
+ } else if (PartialCheck == fcQNan) {
+ // isquiet(V) ==> abs(V) >= (unsigned(Inf) | quiet_bit)
+ PartialRes =
+ DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETGE);
+ } else { // ISD::fcSNan
+ // issignaling(V) ==> abs(V) > unsigned(Inf) &&
+ // abs(V) < (unsigned(Inf) | quiet_bit)
+ SDValue IsNan = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
+ SDValue IsNotQnan =
+ DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETLT);
+ PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, IsNan, IsNotQnan);
+ }
+ appendResult(PartialRes);
+ }
+
+ if (unsigned PartialCheck = Test & fcSubnormal) {
+ // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
+ // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
+ SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
+ SDValue MantissaV = DAG.getConstant(AllOneMantissa, DL, IntVT);
+ SDValue VMinusOneV =
+ DAG.getNode(ISD::SUB, DL, IntVT, V, DAG.getConstant(1, DL, IntVT));
+ PartialRes = DAG.getSetCC(DL, ResultVT, VMinusOneV, MantissaV, ISD::SETULT);
+ if (PartialCheck == fcNegSubnormal)
+ PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
+ appendResult(PartialRes);
+ }
+
+ if (unsigned PartialCheck = Test & fcNormal) {
+ // isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1))
+ APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
+ SDValue ExpLSBV = DAG.getConstant(ExpLSB, DL, IntVT);
+ SDValue ExpMinus1 = DAG.getNode(ISD::SUB, DL, IntVT, AbsV, ExpLSBV);
+ APInt ExpLimit = ExpMask - ExpLSB;
+ SDValue ExpLimitV = DAG.getConstant(ExpLimit, DL, IntVT);
+ PartialRes = DAG.getSetCC(DL, ResultVT, ExpMinus1, ExpLimitV, ISD::SETULT);
+ if (PartialCheck == fcNegNormal)
+ PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
+ else if (PartialCheck == fcPosNormal) {
+ SDValue PosSignV =
+ DAG.getNode(ISD::XOR, DL, ResultVT, SignV, ResultInvertionMask);
+ PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, PosSignV);
+ }
+ if (IsF80)
+ PartialRes =
+ DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, getIntBitIsSet());
+ appendResult(PartialRes);
+ }
+
+ if (!Res)
+ return DAG.getConstant(IsInverted, DL, ResultVT);
+ if (IsInverted)
+ Res = DAG.getNode(ISD::XOR, DL, ResultVT, Res, ResultInvertionMask);
+ return Res;
+}
+
// Only expand vector types if we have the appropriate vector bit operations.
static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
assert(VT.isVector() && "Expected vector type");
@@ -7116,8 +7749,6 @@ SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const {
DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
SDValue Mask0F =
DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
- SDValue Mask01 =
- DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
// v = v - ((v >> 1) & 0x55555555...)
Op = DAG.getNode(ISD::SUB, dl, VT, Op,
@@ -7137,13 +7768,28 @@ SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const {
DAG.getNode(ISD::SRL, dl, VT, Op,
DAG.getConstant(4, dl, ShVT))),
Mask0F);
- // v = (v * 0x01010101...) >> (Len - 8)
- if (Len > 8)
- Op =
- DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
- DAG.getConstant(Len - 8, dl, ShVT));
- return Op;
+ if (Len <= 8)
+ return Op;
+
+ // Avoid the multiply if we only have 2 bytes to add.
+ // TODO: Only doing this for scalars because vectors weren't as obviously
+ // improved.
+ if (Len == 16 && !VT.isVector()) {
+ // v = (v + (v >> 8)) & 0x00FF;
+ return DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(ISD::ADD, dl, VT, Op,
+ DAG.getNode(ISD::SRL, dl, VT, Op,
+ DAG.getConstant(8, dl, ShVT))),
+ DAG.getConstant(0xFF, dl, VT));
+ }
+
+ // v = (v * 0x01010101...) >> (Len - 8)
+ SDValue Mask01 =
+ DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
+ return DAG.getNode(ISD::SRL, dl, VT,
+ DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
+ DAG.getConstant(Len - 8, dl, ShVT));
}
SDValue TargetLowering::expandCTLZ(SDNode *Node, SelectionDAG &DAG) const {
@@ -7265,6 +7911,7 @@ SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG,
if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
isOperationLegal(ISD::UMIN, VT)) {
SDValue Zero = DAG.getConstant(0, dl, VT);
+ Op = DAG.getFreeze(Op);
return DAG.getNode(ISD::UMIN, dl, VT, Op,
DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
}
@@ -7272,6 +7919,7 @@ SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG,
// 0 - abs(x) -> smin(x, sub(0,x))
if (IsNegative && isOperationLegal(ISD::SUB, VT) &&
isOperationLegal(ISD::SMIN, VT)) {
+ Op = DAG.getFreeze(Op);
SDValue Zero = DAG.getConstant(0, dl, VT);
return DAG.getNode(ISD::SMIN, dl, VT, Op,
DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
@@ -7285,16 +7933,17 @@ SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG,
!isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
return SDValue();
+ Op = DAG.getFreeze(Op);
SDValue Shift =
DAG.getNode(ISD::SRA, dl, VT, Op,
DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, ShVT));
- if (!IsNegative) {
- SDValue Add = DAG.getNode(ISD::ADD, dl, VT, Op, Shift);
- return DAG.getNode(ISD::XOR, dl, VT, Add, Shift);
- }
+ SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
+
+ // abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
+ if (!IsNegative)
+ return DAG.getNode(ISD::SUB, dl, VT, Xor, Shift);
// 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
- SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
return DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
}
@@ -8041,23 +8690,6 @@ SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
return SDValue();
}
-// Convert redundant addressing modes (e.g. scaling is redundant
-// when accessing bytes).
-ISD::MemIndexType
-TargetLowering::getCanonicalIndexType(ISD::MemIndexType IndexType, EVT MemVT,
- SDValue Offsets) const {
- bool IsScaledIndex =
- (IndexType == ISD::SIGNED_SCALED) || (IndexType == ISD::UNSIGNED_SCALED);
- bool IsSignedIndex =
- (IndexType == ISD::SIGNED_SCALED) || (IndexType == ISD::SIGNED_UNSCALED);
-
- // Scaling is unimportant for bytes, canonicalize to unscaled.
- if (IsScaledIndex && MemVT.getScalarType() == MVT::i8)
- return IsSignedIndex ? ISD::SIGNED_UNSCALED : ISD::UNSIGNED_UNSCALED;
-
- return IndexType;
-}
-
SDValue TargetLowering::expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const {
SDValue Op0 = Node->getOperand(0);
SDValue Op1 = Node->getOperand(1);
@@ -8473,8 +9105,20 @@ void TargetLowering::expandUADDSUBO(
EVT ResultType = Node->getValueType(1);
EVT SetCCType = getSetCCResultType(
DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
- ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
- SDValue SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
+ SDValue SetCC;
+ if (IsAdd && isOneConstant(RHS)) {
+ // Special case: uaddo X, 1 overflowed if X+1 is 0. This potential reduces
+ // the live range of X. We assume comparing with 0 is cheap.
+ // The general case (X + C) < C is not necessarily beneficial. Although we
+ // reduce the live range of X, we may introduce the materialization of
+ // constant C.
+ SetCC =
+ DAG.getSetCC(dl, SetCCType, Result,
+ DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETEQ);
+ } else {
+ ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
+ SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
+ }
Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
}
@@ -8773,11 +9417,11 @@ SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node,
// floating-point values.
APInt MinInt, MaxInt;
if (IsSigned) {
- MinInt = APInt::getSignedMinValue(SatWidth).sextOrSelf(DstWidth);
- MaxInt = APInt::getSignedMaxValue(SatWidth).sextOrSelf(DstWidth);
+ MinInt = APInt::getSignedMinValue(SatWidth).sext(DstWidth);
+ MaxInt = APInt::getSignedMaxValue(SatWidth).sext(DstWidth);
} else {
- MinInt = APInt::getMinValue(SatWidth).zextOrSelf(DstWidth);
- MaxInt = APInt::getMaxValue(SatWidth).zextOrSelf(DstWidth);
+ MinInt = APInt::getMinValue(SatWidth).zext(DstWidth);
+ MaxInt = APInt::getMaxValue(SatWidth).zext(DstWidth);
}
// We cannot risk emitting FP_TO_XINT nodes with a source VT of f16, as
@@ -8931,13 +9575,16 @@ SDValue TargetLowering::expandVectorSplice(SDNode *Node,
bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT,
SDValue &LHS, SDValue &RHS,
- SDValue &CC, bool &NeedInvert,
+ SDValue &CC, SDValue Mask,
+ SDValue EVL, bool &NeedInvert,
const SDLoc &dl, SDValue &Chain,
bool IsSignaling) const {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
MVT OpVT = LHS.getSimpleValueType();
ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
NeedInvert = false;
+ assert(!EVL == !Mask && "VP Mask and EVL must either both be set or unset");
+ bool IsNonVP = !EVL;
switch (TLI.getCondCodeAction(CCCode, OpVT)) {
default:
llvm_unreachable("Unknown condition code action!");
@@ -9044,17 +9691,34 @@ bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT,
if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
// If we aren't the ordered or unorder operation,
// then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
- SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling);
- SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling);
+ if (IsNonVP) {
+ SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling);
+ SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling);
+ } else {
+ SetCC1 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC1, Mask, EVL);
+ SetCC2 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC2, Mask, EVL);
+ }
} else {
// Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
- SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling);
- SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling);
+ if (IsNonVP) {
+ SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling);
+ SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling);
+ } else {
+ SetCC1 = DAG.getSetCCVP(dl, VT, LHS, LHS, CC1, Mask, EVL);
+ SetCC2 = DAG.getSetCCVP(dl, VT, RHS, RHS, CC2, Mask, EVL);
+ }
}
if (Chain)
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
SetCC2.getValue(1));
- LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
+ if (IsNonVP)
+ LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
+ else {
+ // Transform the binary opcode to the VP equivalent.
+ assert((Opc == ISD::OR || Opc == ISD::AND) && "Unexpected opcode");
+ Opc = Opc == ISD::OR ? ISD::VP_OR : ISD::VP_AND;
+ LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2, Mask, EVL);
+ }
RHS = SDValue();
CC = SDValue();
return true;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ShadowStackGCLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
index 43a54ce33bf0..5f9ade18f15c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
@@ -39,7 +39,6 @@
#include "llvm/Support/Casting.h"
#include "llvm/Transforms/Utils/EscapeEnumerator.h"
#include <cassert>
-#include <cstddef>
#include <string>
#include <utility>
#include <vector>
@@ -362,7 +361,7 @@ bool ShadowStackGCLowering::runOnFunction(Function &F) {
// For each instruction that escapes...
EscapeEnumerator EE(F, "gc_cleanup", /*HandleExceptions=*/true,
- DTU.hasValue() ? DTU.getPointer() : nullptr);
+ DTU ? DTU.getPointer() : nullptr);
while (IRBuilder<> *AtExit = EE.Next()) {
// Pop the entry from the shadow stack. Don't reuse CurrentHead from
// AtEntry, since that would make the value live for the entire function.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SjLjEHPrepare.cpp
index 8211e3d6a9dd..1fcee02184a9 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SjLjEHPrepare.cpp
@@ -413,7 +413,7 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) {
Val = Builder.CreateCall(StackAddrFn, {}, "sp");
Builder.CreateStore(Val, StackPtr, /*isVolatile=*/true);
- // Call the setup_dispatch instrinsic. It fills in the rest of the jmpbuf.
+ // Call the setup_dispatch intrinsic. It fills in the rest of the jmpbuf.
Builder.CreateCall(BuiltinSetupDispatchFn, {});
// Store a pointer to the function context so that the back-end will know
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp
index 7f9518e4c075..140a91ae342b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp
@@ -389,17 +389,34 @@ LLVM_DUMP_METHOD void SplitEditor::dump() const {
}
#endif
-LiveInterval::SubRange &SplitEditor::getSubRangeForMaskExact(LaneBitmask LM,
- LiveInterval &LI) {
- for (LiveInterval::SubRange &S : LI.subranges())
+/// Find a subrange corresponding to the exact lane mask @p LM in the live
+/// interval @p LI. The interval @p LI is assumed to contain such a subrange.
+/// This function is used to find corresponding subranges between the
+/// original interval and the new intervals.
+template <typename T> auto &getSubrangeImpl(LaneBitmask LM, T &LI) {
+ for (auto &S : LI.subranges())
if (S.LaneMask == LM)
return S;
llvm_unreachable("SubRange for this mask not found");
}
-LiveInterval::SubRange &SplitEditor::getSubRangeForMask(LaneBitmask LM,
- LiveInterval &LI) {
- for (LiveInterval::SubRange &S : LI.subranges())
+LiveInterval::SubRange &getSubRangeForMaskExact(LaneBitmask LM,
+ LiveInterval &LI) {
+ return getSubrangeImpl(LM, LI);
+}
+
+const LiveInterval::SubRange &getSubRangeForMaskExact(LaneBitmask LM,
+ const LiveInterval &LI) {
+ return getSubrangeImpl(LM, LI);
+}
+
+/// Find a subrange corresponding to the lane mask @p LM, or a superset of it,
+/// in the live interval @p LI. The interval @p LI is assumed to contain such
+/// a subrange. This function is used to find corresponding subranges between
+/// the original interval and the new intervals.
+const LiveInterval::SubRange &getSubRangeForMask(LaneBitmask LM,
+ const LiveInterval &LI) {
+ for (const LiveInterval::SubRange &S : LI.subranges())
if ((S.LaneMask & LM) == LM)
return S;
llvm_unreachable("SubRange for this mask not found");
@@ -566,10 +583,8 @@ SlotIndex SplitEditor::buildCopy(Register FromReg, Register ToReg,
return Def;
}
-VNInfo *SplitEditor::defFromParent(unsigned RegIdx,
- VNInfo *ParentVNI,
- SlotIndex UseIdx,
- MachineBasicBlock &MBB,
+VNInfo *SplitEditor::defFromParent(unsigned RegIdx, const VNInfo *ParentVNI,
+ SlotIndex UseIdx, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) {
SlotIndex Def;
LiveInterval *LI = &LIS.getInterval(Edit->get(RegIdx));
@@ -937,7 +952,7 @@ SplitEditor::findShallowDominator(MachineBasicBlock *MBB,
void SplitEditor::computeRedundantBackCopies(
DenseSet<unsigned> &NotToHoistSet, SmallVectorImpl<VNInfo *> &BackCopies) {
LiveInterval *LI = &LIS.getInterval(Edit->get(0));
- LiveInterval *Parent = &Edit->getParent();
+ const LiveInterval *Parent = &Edit->getParent();
SmallVector<SmallPtrSet<VNInfo *, 8>, 8> EqualVNs(Parent->getNumValNums());
SmallPtrSet<VNInfo *, 8> DominatedVNIs;
@@ -952,7 +967,7 @@ void SplitEditor::computeRedundantBackCopies(
// For VNI aggregation of each ParentVNI, collect dominated, i.e.,
// redundant VNIs to BackCopies.
for (unsigned i = 0, e = Parent->getNumValNums(); i != e; ++i) {
- VNInfo *ParentVNI = Parent->getValNumInfo(i);
+ const VNInfo *ParentVNI = Parent->getValNumInfo(i);
if (!NotToHoistSet.count(ParentVNI->id))
continue;
SmallPtrSetIterator<VNInfo *> It1 = EqualVNs[ParentVNI->id].begin();
@@ -990,7 +1005,7 @@ void SplitEditor::computeRedundantBackCopies(
void SplitEditor::hoistCopies() {
// Get the complement interval, always RegIdx 0.
LiveInterval *LI = &LIS.getInterval(Edit->get(0));
- LiveInterval *Parent = &Edit->getParent();
+ const LiveInterval *Parent = &Edit->getParent();
// Track the nearest common dominator for all back-copies for each ParentVNI,
// indexed by ParentVNI->id.
@@ -1067,7 +1082,7 @@ void SplitEditor::hoistCopies() {
if (!Dom.first || Dom.second.isValid())
continue;
// This value needs a hoisted copy inserted at the end of Dom.first.
- VNInfo *ParentVNI = Parent->getValNumInfo(i);
+ const VNInfo *ParentVNI = Parent->getValNumInfo(i);
MachineBasicBlock *DefMBB = LIS.getMBBFromIndex(ParentVNI->def);
// Get a less loopy dominator than Dom.first.
Dom.first = findShallowDominator(Dom.first, DefMBB);
@@ -1237,11 +1252,11 @@ void SplitEditor::extendPHIRange(MachineBasicBlock &B, LiveIntervalCalc &LIC,
SlotIndex LastUse = End.getPrevSlot();
// The predecessor may not have a live-out value. That is OK, like an
// undef PHI operand.
- LiveInterval &PLI = Edit->getParent();
+ const LiveInterval &PLI = Edit->getParent();
// Need the cast because the inputs to ?: would otherwise be deemed
// "incompatible": SubRange vs LiveInterval.
- LiveRange &PSR = !LM.all() ? getSubRangeForMaskExact(LM, PLI)
- : static_cast<LiveRange &>(PLI);
+ const LiveRange &PSR = !LM.all() ? getSubRangeForMaskExact(LM, PLI)
+ : static_cast<const LiveRange &>(PLI);
if (PSR.liveAt(LastUse))
LIC.extend(LR, End, /*PhysReg=*/0, Undefs);
}
@@ -1254,7 +1269,7 @@ void SplitEditor::extendPHIKillRanges() {
// remove it. Otherwise, extend the live interval to reach the end indexes
// of all predecessor blocks.
- LiveInterval &ParentLI = Edit->getParent();
+ const LiveInterval &ParentLI = Edit->getParent();
for (const VNInfo *V : ParentLI.valnos) {
if (V->isUnused() || !V->isPHIDef())
continue;
@@ -1270,7 +1285,7 @@ void SplitEditor::extendPHIKillRanges() {
SmallVector<SlotIndex, 4> Undefs;
LiveIntervalCalc SubLIC;
- for (LiveInterval::SubRange &PS : ParentLI.subranges()) {
+ for (const LiveInterval::SubRange &PS : ParentLI.subranges()) {
for (const VNInfo *V : PS.valnos) {
if (V->isUnused() || !V->isPHIDef())
continue;
@@ -1337,13 +1352,34 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) {
continue;
// We may want to extend a live range for a partial redef, or for a use
// tied to an early clobber.
- Idx = Idx.getPrevSlot();
- if (!Edit->getParent().liveAt(Idx))
+ if (!Edit->getParent().liveAt(Idx.getPrevSlot()))
continue;
- } else
- Idx = Idx.getRegSlot(true);
+ } else {
+ assert(MO.isUse());
+ bool IsEarlyClobber = false;
+ if (MO.isTied()) {
+ // We want to extend a live range into `e` slot rather than `r` slot if
+ // tied-def is early clobber, because the `e` slot already contained
+ // in the live range of early-clobber tied-def operand, give an example
+ // here:
+ // 0 %0 = ...
+ // 16 early-clobber %0 = Op %0 (tied-def 0), ...
+ // 32 ... = Op %0
+ // Before extend:
+ // %0 = [0r, 0d) [16e, 32d)
+ // The point we want to extend is 0d to 16e not 16r in this case, but if
+ // we use 16r here we will extend nothing because that already contained
+ // in [16e, 32d).
+ unsigned OpIdx = MI->getOperandNo(&MO);
+ unsigned DefOpIdx = MI->findTiedOperandIdx(OpIdx);
+ const MachineOperand &DefOp = MI->getOperand(DefOpIdx);
+ IsEarlyClobber = DefOp.isEarlyClobber();
+ }
+
+ Idx = Idx.getRegSlot(IsEarlyClobber);
+ }
- SlotIndex Next = Idx.getNextSlot();
+ SlotIndex Next = Idx;
if (LI.hasSubRanges()) {
// We have to delay extending subranges until we have seen all operands
// defining the register. This is because a <def,read-undef> operand
@@ -1510,9 +1546,8 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
// Provide a reverse mapping from original indices to Edit ranges.
if (LRMap) {
- LRMap->clear();
- for (unsigned i = 0, e = Edit->size(); i != e; ++i)
- LRMap->push_back(i);
+ auto Seq = llvm::seq<unsigned>(0, Edit->size());
+ LRMap->assign(Seq.begin(), Seq.end());
}
// Now check if any registers were separated into multiple components.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.h b/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.h
index 902546fe16d8..4400a797d38e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.h
@@ -22,19 +22,19 @@
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervalCalc.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/SlotIndexes.h"
-#include "llvm/MC/LaneBitmask.h"
#include "llvm/Support/Compiler.h"
#include <utility>
namespace llvm {
class AAResults;
+class LiveInterval;
+class LiveRange;
class LiveIntervals;
class LiveRangeEdit;
class MachineBlockFrequencyInfo;
@@ -346,19 +346,6 @@ private:
return LICalc[SpillMode != SM_Partition && RegIdx != 0];
}
- /// Find a subrange corresponding to the exact lane mask @p LM in the live
- /// interval @p LI. The interval @p LI is assumed to contain such a subrange.
- /// This function is used to find corresponding subranges between the
- /// original interval and the new intervals.
- LiveInterval::SubRange &getSubRangeForMaskExact(LaneBitmask LM,
- LiveInterval &LI);
-
- /// Find a subrange corresponding to the lane mask @p LM, or a superset of it,
- /// in the live interval @p LI. The interval @p LI is assumed to contain such
- /// a subrange. This function is used to find corresponding subranges between
- /// the original interval and the new intervals.
- LiveInterval::SubRange &getSubRangeForMask(LaneBitmask LM, LiveInterval &LI);
-
/// Add a segment to the interval LI for the value number VNI. If LI has
/// subranges, corresponding segments will be added to them as well, but
/// with newly created value numbers. If Original is true, dead def will
@@ -390,10 +377,8 @@ private:
/// defFromParent - Define Reg from ParentVNI at UseIdx using either
/// rematerialization or a COPY from parent. Return the new value.
- VNInfo *defFromParent(unsigned RegIdx,
- VNInfo *ParentVNI,
- SlotIndex UseIdx,
- MachineBasicBlock &MBB,
+ VNInfo *defFromParent(unsigned RegIdx, const VNInfo *ParentVNI,
+ SlotIndex UseIdx, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I);
/// removeBackCopies - Remove the copy instructions that defines the values
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackColoring.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackColoring.cpp
index 623d5da9831e..11c6bdc69956 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/StackColoring.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/StackColoring.cpp
@@ -36,14 +36,12 @@
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/WinEHFuncInfo.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfoMetadata.h"
-#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Use.h"
@@ -1145,6 +1143,9 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
LLVM_DEBUG(dbgs() << "Fixed " << FixedMemOp << " machine memory operands.\n");
LLVM_DEBUG(dbgs() << "Fixed " << FixedDbg << " debug locations.\n");
LLVM_DEBUG(dbgs() << "Fixed " << FixedInstr << " machine instructions.\n");
+ (void) FixedMemOp;
+ (void) FixedDbg;
+ (void) FixedInstr;
}
void StackColoring::removeInvalidSlotRanges() {
@@ -1319,6 +1320,11 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
int FirstSlot = SortedSlots[I];
int SecondSlot = SortedSlots[J];
+
+ // Objects with different stack IDs cannot be merged.
+ if (MFI->getStackID(FirstSlot) != MFI->getStackID(SecondSlot))
+ continue;
+
LiveInterval *First = &*Intervals[FirstSlot];
LiveInterval *Second = &*Intervals[SecondSlot];
auto &FirstS = LiveStarts[FirstSlot];
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp
index 3640296adbca..b83c56903133 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp
@@ -17,9 +17,9 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackMaps.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackMaps.cpp
index 36e8f129ea15..6757d6ca4f88 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/StackMaps.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/StackMaps.cpp
@@ -721,7 +721,7 @@ void StackMaps::serializeToStackMapSection() {
// Create the section.
MCSection *StackMapSection =
OutContext.getObjectFileInfo()->getStackMapSection();
- OS.SwitchSection(StackMapSection);
+ OS.switchSection(StackMapSection);
// Emit a dummy symbol to force section inclusion.
OS.emitLabel(OutContext.getOrCreateSymbol(Twine("__LLVM_StackMaps")));
@@ -732,7 +732,7 @@ void StackMaps::serializeToStackMapSection() {
emitFunctionFrameRecords(OS);
emitConstantPoolEntries(OS);
emitCallsiteEntries(OS);
- OS.AddBlankLine();
+ OS.addBlankLine();
// Clean up.
CSInfos.clear();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp
index 6765fd274686..510a8e3e4ba2 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp
@@ -28,8 +28,6 @@
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DebugInfo.h"
-#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
@@ -169,7 +167,7 @@ bool StackProtector::HasAddressTaken(const Instruction *AI,
// If this instruction accesses memory make sure it doesn't access beyond
// the bounds of the allocated object.
Optional<MemoryLocation> MemLoc = MemoryLocation::getOrNone(I);
- if (MemLoc.hasValue() && MemLoc->Size.hasValue() &&
+ if (MemLoc && MemLoc->Size.hasValue() &&
!TypeSize::isKnownGE(AllocSize,
TypeSize::getFixed(MemLoc->Size.getValue())))
return true;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp
index 17e6f51d0899..b8c750688914 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp
@@ -28,7 +28,6 @@
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
-#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TailDuplication.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TailDuplication.cpp
index 20892a79d35f..bf3d2088e196 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TailDuplication.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TailDuplication.cpp
@@ -14,14 +14,14 @@
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MBFIWrapper.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TailDuplicator.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp
index 68a7b80d6146..ba533a491b9c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp
@@ -19,17 +19,15 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
-#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MachineSizeOpts.h"
#include "llvm/CodeGen/MachineSSAUpdater.h"
+#include "llvm/CodeGen/MachineSizeOpts.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
@@ -370,8 +368,8 @@ void TailDuplicator::processPHI(
return;
// Remove PredBB from the PHI node.
- MI->RemoveOperand(SrcOpIdx + 1);
- MI->RemoveOperand(SrcOpIdx);
+ MI->removeOperand(SrcOpIdx + 1);
+ MI->removeOperand(SrcOpIdx);
if (MI->getNumOperands() == 1)
MI->eraseFromParent();
}
@@ -385,8 +383,9 @@ void TailDuplicator::duplicateInstruction(
// Allow duplication of CFI instructions.
if (MI->isCFIInstruction()) {
BuildMI(*PredBB, PredBB->end(), PredBB->findDebugLoc(PredBB->begin()),
- TII->get(TargetOpcode::CFI_INSTRUCTION)).addCFIIndex(
- MI->getOperand(0).getCFIIndex());
+ TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(MI->getOperand(0).getCFIIndex())
+ .setMIFlags(MI->getFlags());
return;
}
MachineInstr &NewMI = TII->duplicate(*PredBB, PredBB->end(), *MI);
@@ -496,15 +495,15 @@ void TailDuplicator::updateSuccessorsPHIs(
for (unsigned i = MI.getNumOperands() - 2; i != Idx; i -= 2) {
MachineOperand &MO = MI.getOperand(i + 1);
if (MO.getMBB() == FromBB) {
- MI.RemoveOperand(i + 1);
- MI.RemoveOperand(i);
+ MI.removeOperand(i + 1);
+ MI.removeOperand(i);
}
}
} else
Idx = 0;
// If Idx is set, the operands at Idx and Idx+1 must be removed.
- // We reuse the location to avoid expensive RemoveOperand calls.
+ // We reuse the location to avoid expensive removeOperand calls.
DenseMap<Register, AvailableValsTy>::iterator LI =
SSAUpdateVals.find(Reg);
@@ -541,8 +540,8 @@ void TailDuplicator::updateSuccessorsPHIs(
}
}
if (Idx != 0) {
- MI.RemoveOperand(Idx + 1);
- MI.RemoveOperand(Idx);
+ MI.removeOperand(Idx + 1);
+ MI.removeOperand(Idx);
}
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
index fbf190a52585..9430e86fe44d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -10,17 +10,17 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstrTypes.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Target/TargetMachine.h"
@@ -37,6 +37,11 @@ bool TargetFrameLowering::enableCalleeSaveSkip(const MachineFunction &MF) const
return false;
}
+bool TargetFrameLowering::enableCFIFixup(MachineFunction &MF) const {
+ return MF.needsFrameMoves() &&
+ !MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
+}
+
/// Returns the displacement from the frame register to the stack
/// frame of the specified index, along with the frame register used
/// (in output arg FrameReg). This is the default implementation which
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp
index 3f22cc4289f2..2a987ee3eedf 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -12,6 +12,7 @@
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
@@ -31,8 +32,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
-#include <cctype>
using namespace llvm;
@@ -40,8 +39,7 @@ static cl::opt<bool> DisableHazardRecognizer(
"disable-sched-hazard", cl::Hidden, cl::init(false),
cl::desc("Disable hazard detection during preRA scheduling"));
-TargetInstrInfo::~TargetInstrInfo() {
-}
+TargetInstrInfo::~TargetInstrInfo() = default;
const TargetRegisterClass*
TargetInstrInfo::getRegClass(const MCInstrDesc &MCID, unsigned OpNum,
@@ -873,11 +871,13 @@ void TargetInstrInfo::reassociateOps(
MachineInstrBuilder MIB1 =
BuildMI(*MF, Prev.getDebugLoc(), TII->get(Opcode), NewVR)
.addReg(RegX, getKillRegState(KillX))
- .addReg(RegY, getKillRegState(KillY));
+ .addReg(RegY, getKillRegState(KillY))
+ .setMIFlags(Prev.getFlags());
MachineInstrBuilder MIB2 =
BuildMI(*MF, Root.getDebugLoc(), TII->get(Opcode), RegC)
.addReg(RegA, getKillRegState(KillA))
- .addReg(NewVR, getKillRegState(true));
+ .addReg(NewVR, getKillRegState(true))
+ .setMIFlags(Root.getFlags());
setSpecialOperandAttr(Root, Prev, *MIB1, *MIB2);
@@ -1399,7 +1399,7 @@ std::string TargetInstrInfo::createMIROperandComment(
return OS.str();
}
-TargetInstrInfo::PipelinerLoopInfo::~PipelinerLoopInfo() {}
+TargetInstrInfo::PipelinerLoopInfo::~PipelinerLoopInfo() = default;
void TargetInstrInfo::mergeOutliningCandidateAttributes(
Function &F, std::vector<outliner::Candidate> &Candidates) const {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp
index f69e50eaa0ca..f7f4a4e3db6a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -56,7 +56,6 @@
#include "llvm/Transforms/Utils/SizeOpts.h"
#include <algorithm>
#include <cassert>
-#include <cstddef>
#include <cstdint>
#include <cstring>
#include <iterator>
@@ -202,7 +201,7 @@ void TargetLoweringBase::InitLibcalls(const Triple &TT) {
setLibcallName(RTLIB::SINCOS_PPCF128, "sincosl");
}
- if (TT.isPS4CPU()) {
+ if (TT.isPS()) {
setLibcallName(RTLIB::SINCOS_F32, "sincosf");
setLibcallName(RTLIB::SINCOS_F64, "sincos");
}
@@ -275,6 +274,11 @@ RTLIB::Libcall RTLIB::getFPROUND(EVT OpVT, EVT RetVT) {
return FPROUND_F128_F16;
if (OpVT == MVT::ppcf128)
return FPROUND_PPCF128_F16;
+ } else if (RetVT == MVT::bf16) {
+ if (OpVT == MVT::f32)
+ return FPROUND_F32_BF16;
+ if (OpVT == MVT::f64)
+ return FPROUND_F64_BF16;
} else if (RetVT == MVT::f32) {
if (OpVT == MVT::f64)
return FPROUND_F64_F32;
@@ -740,6 +744,30 @@ void TargetLoweringBase::initActions() {
std::fill(std::begin(TargetDAGCombineArray),
std::end(TargetDAGCombineArray), 0);
+ // We're somewhat special casing MVT::i2 and MVT::i4. Ideally we want to
+ // remove this and targets should individually set these types if not legal.
+ for (ISD::NodeType NT : enum_seq(ISD::DELETED_NODE, ISD::BUILTIN_OP_END,
+ force_iteration_on_noniterable_enum)) {
+ for (MVT VT : {MVT::i2, MVT::i4})
+ OpActions[(unsigned)VT.SimpleTy][NT] = Expand;
+ }
+ for (MVT AVT : MVT::all_valuetypes()) {
+ for (MVT VT : {MVT::i2, MVT::i4, MVT::v128i2, MVT::v64i4}) {
+ setTruncStoreAction(AVT, VT, Expand);
+ setLoadExtAction(ISD::EXTLOAD, AVT, VT, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, AVT, VT, Expand);
+ }
+ }
+ for (unsigned IM = (unsigned)ISD::PRE_INC;
+ IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) {
+ for (MVT VT : {MVT::i2, MVT::i4}) {
+ setIndexedLoadAction(IM, VT, Expand);
+ setIndexedStoreAction(IM, VT, Expand);
+ setIndexedMaskedLoadAction(IM, VT, Expand);
+ setIndexedMaskedStoreAction(IM, VT, Expand);
+ }
+ }
+
for (MVT VT : MVT::fp_valuetypes()) {
MVT IntVT = MVT::getIntegerVT(VT.getFixedSizeInBits());
if (IntVT.isValid()) {
@@ -763,85 +791,63 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Expand);
// These operations default to expand.
- setOperationAction(ISD::FGETSIGN, VT, Expand);
- setOperationAction(ISD::CONCAT_VECTORS, VT, Expand);
- setOperationAction(ISD::FMINNUM, VT, Expand);
- setOperationAction(ISD::FMAXNUM, VT, Expand);
- setOperationAction(ISD::FMINNUM_IEEE, VT, Expand);
- setOperationAction(ISD::FMAXNUM_IEEE, VT, Expand);
- setOperationAction(ISD::FMINIMUM, VT, Expand);
- setOperationAction(ISD::FMAXIMUM, VT, Expand);
- setOperationAction(ISD::FMAD, VT, Expand);
- setOperationAction(ISD::SMIN, VT, Expand);
- setOperationAction(ISD::SMAX, VT, Expand);
- setOperationAction(ISD::UMIN, VT, Expand);
- setOperationAction(ISD::UMAX, VT, Expand);
- setOperationAction(ISD::ABS, VT, Expand);
- setOperationAction(ISD::FSHL, VT, Expand);
- setOperationAction(ISD::FSHR, VT, Expand);
- setOperationAction(ISD::SADDSAT, VT, Expand);
- setOperationAction(ISD::UADDSAT, VT, Expand);
- setOperationAction(ISD::SSUBSAT, VT, Expand);
- setOperationAction(ISD::USUBSAT, VT, Expand);
- setOperationAction(ISD::SSHLSAT, VT, Expand);
- setOperationAction(ISD::USHLSAT, VT, Expand);
- setOperationAction(ISD::SMULFIX, VT, Expand);
- setOperationAction(ISD::SMULFIXSAT, VT, Expand);
- setOperationAction(ISD::UMULFIX, VT, Expand);
- setOperationAction(ISD::UMULFIXSAT, VT, Expand);
- setOperationAction(ISD::SDIVFIX, VT, Expand);
- setOperationAction(ISD::SDIVFIXSAT, VT, Expand);
- setOperationAction(ISD::UDIVFIX, VT, Expand);
- setOperationAction(ISD::UDIVFIXSAT, VT, Expand);
- setOperationAction(ISD::FP_TO_SINT_SAT, VT, Expand);
- setOperationAction(ISD::FP_TO_UINT_SAT, VT, Expand);
+ setOperationAction({ISD::FGETSIGN, ISD::CONCAT_VECTORS,
+ ISD::FMINNUM, ISD::FMAXNUM,
+ ISD::FMINNUM_IEEE, ISD::FMAXNUM_IEEE,
+ ISD::FMINIMUM, ISD::FMAXIMUM,
+ ISD::FMAD, ISD::SMIN,
+ ISD::SMAX, ISD::UMIN,
+ ISD::UMAX, ISD::ABS,
+ ISD::FSHL, ISD::FSHR,
+ ISD::SADDSAT, ISD::UADDSAT,
+ ISD::SSUBSAT, ISD::USUBSAT,
+ ISD::SSHLSAT, ISD::USHLSAT,
+ ISD::SMULFIX, ISD::SMULFIXSAT,
+ ISD::UMULFIX, ISD::UMULFIXSAT,
+ ISD::SDIVFIX, ISD::SDIVFIXSAT,
+ ISD::UDIVFIX, ISD::UDIVFIXSAT,
+ ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT,
+ ISD::IS_FPCLASS},
+ VT, Expand);
// Overflow operations default to expand
- setOperationAction(ISD::SADDO, VT, Expand);
- setOperationAction(ISD::SSUBO, VT, Expand);
- setOperationAction(ISD::UADDO, VT, Expand);
- setOperationAction(ISD::USUBO, VT, Expand);
- setOperationAction(ISD::SMULO, VT, Expand);
- setOperationAction(ISD::UMULO, VT, Expand);
+ setOperationAction({ISD::SADDO, ISD::SSUBO, ISD::UADDO, ISD::USUBO,
+ ISD::SMULO, ISD::UMULO},
+ VT, Expand);
// ADDCARRY operations default to expand
- setOperationAction(ISD::ADDCARRY, VT, Expand);
- setOperationAction(ISD::SUBCARRY, VT, Expand);
- setOperationAction(ISD::SETCCCARRY, VT, Expand);
- setOperationAction(ISD::SADDO_CARRY, VT, Expand);
- setOperationAction(ISD::SSUBO_CARRY, VT, Expand);
+ setOperationAction({ISD::ADDCARRY, ISD::SUBCARRY, ISD::SETCCCARRY,
+ ISD::SADDO_CARRY, ISD::SSUBO_CARRY},
+ VT, Expand);
// ADDC/ADDE/SUBC/SUBE default to expand.
- setOperationAction(ISD::ADDC, VT, Expand);
- setOperationAction(ISD::ADDE, VT, Expand);
- setOperationAction(ISD::SUBC, VT, Expand);
- setOperationAction(ISD::SUBE, VT, Expand);
+ setOperationAction({ISD::ADDC, ISD::ADDE, ISD::SUBC, ISD::SUBE}, VT,
+ Expand);
+
+ // Halving adds
+ setOperationAction(
+ {ISD::AVGFLOORS, ISD::AVGFLOORU, ISD::AVGCEILS, ISD::AVGCEILU}, VT,
+ Expand);
// Absolute difference
- setOperationAction(ISD::ABDS, VT, Expand);
- setOperationAction(ISD::ABDU, VT, Expand);
+ setOperationAction({ISD::ABDS, ISD::ABDU}, VT, Expand);
// These default to Expand so they will be expanded to CTLZ/CTTZ by default.
- setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
- setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
+ setOperationAction({ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT,
+ Expand);
- setOperationAction(ISD::BITREVERSE, VT, Expand);
- setOperationAction(ISD::PARITY, VT, Expand);
+ setOperationAction({ISD::BITREVERSE, ISD::PARITY}, VT, Expand);
// These library functions default to expand.
- setOperationAction(ISD::FROUND, VT, Expand);
- setOperationAction(ISD::FROUNDEVEN, VT, Expand);
- setOperationAction(ISD::FPOWI, VT, Expand);
+ setOperationAction({ISD::FROUND, ISD::FROUNDEVEN, ISD::FPOWI}, VT, Expand);
// These operations default to expand for vector types.
- if (VT.isVector()) {
- setOperationAction(ISD::FCOPYSIGN, VT, Expand);
- setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
- setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, VT, Expand);
- setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Expand);
- setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Expand);
- setOperationAction(ISD::SPLAT_VECTOR, VT, Expand);
- }
+ if (VT.isVector())
+ setOperationAction({ISD::FCOPYSIGN, ISD::SIGN_EXTEND_INREG,
+ ISD::ANY_EXTEND_VECTOR_INREG,
+ ISD::SIGN_EXTEND_VECTOR_INREG,
+ ISD::ZERO_EXTEND_VECTOR_INREG, ISD::SPLAT_VECTOR},
+ VT, Expand);
// Constrained floating-point operations default to expand.
#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
@@ -852,21 +858,13 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, VT, Expand);
// Vector reduction default to expand.
- setOperationAction(ISD::VECREDUCE_FADD, VT, Expand);
- setOperationAction(ISD::VECREDUCE_FMUL, VT, Expand);
- setOperationAction(ISD::VECREDUCE_ADD, VT, Expand);
- setOperationAction(ISD::VECREDUCE_MUL, VT, Expand);
- setOperationAction(ISD::VECREDUCE_AND, VT, Expand);
- setOperationAction(ISD::VECREDUCE_OR, VT, Expand);
- setOperationAction(ISD::VECREDUCE_XOR, VT, Expand);
- setOperationAction(ISD::VECREDUCE_SMAX, VT, Expand);
- setOperationAction(ISD::VECREDUCE_SMIN, VT, Expand);
- setOperationAction(ISD::VECREDUCE_UMAX, VT, Expand);
- setOperationAction(ISD::VECREDUCE_UMIN, VT, Expand);
- setOperationAction(ISD::VECREDUCE_FMAX, VT, Expand);
- setOperationAction(ISD::VECREDUCE_FMIN, VT, Expand);
- setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Expand);
- setOperationAction(ISD::VECREDUCE_SEQ_FMUL, VT, Expand);
+ setOperationAction(
+ {ISD::VECREDUCE_FADD, ISD::VECREDUCE_FMUL, ISD::VECREDUCE_ADD,
+ ISD::VECREDUCE_MUL, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR,
+ ISD::VECREDUCE_XOR, ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN,
+ ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN, ISD::VECREDUCE_FMAX,
+ ISD::VECREDUCE_FMIN, ISD::VECREDUCE_SEQ_FADD, ISD::VECREDUCE_SEQ_FMUL},
+ VT, Expand);
// Named vector shuffles default to expand.
setOperationAction(ISD::VECTOR_SPLICE, VT, Expand);
@@ -881,30 +879,16 @@ void TargetLoweringBase::initActions() {
// ConstantFP nodes default to expand. Targets can either change this to
// Legal, in which case all fp constants are legal, or use isFPImmLegal()
// to optimize expansions for certain constants.
- setOperationAction(ISD::ConstantFP, MVT::f16, Expand);
- setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
- setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
- setOperationAction(ISD::ConstantFP, MVT::f80, Expand);
- setOperationAction(ISD::ConstantFP, MVT::f128, Expand);
+ setOperationAction(ISD::ConstantFP,
+ {MVT::f16, MVT::f32, MVT::f64, MVT::f80, MVT::f128},
+ Expand);
// These library functions default to expand.
- for (MVT VT : {MVT::f32, MVT::f64, MVT::f128}) {
- setOperationAction(ISD::FCBRT, VT, Expand);
- setOperationAction(ISD::FLOG , VT, Expand);
- setOperationAction(ISD::FLOG2, VT, Expand);
- setOperationAction(ISD::FLOG10, VT, Expand);
- setOperationAction(ISD::FEXP , VT, Expand);
- setOperationAction(ISD::FEXP2, VT, Expand);
- setOperationAction(ISD::FFLOOR, VT, Expand);
- setOperationAction(ISD::FNEARBYINT, VT, Expand);
- setOperationAction(ISD::FCEIL, VT, Expand);
- setOperationAction(ISD::FRINT, VT, Expand);
- setOperationAction(ISD::FTRUNC, VT, Expand);
- setOperationAction(ISD::LROUND, VT, Expand);
- setOperationAction(ISD::LLROUND, VT, Expand);
- setOperationAction(ISD::LRINT, VT, Expand);
- setOperationAction(ISD::LLRINT, VT, Expand);
- }
+ setOperationAction({ISD::FCBRT, ISD::FLOG, ISD::FLOG2, ISD::FLOG10, ISD::FEXP,
+ ISD::FEXP2, ISD::FFLOOR, ISD::FNEARBYINT, ISD::FCEIL,
+ ISD::FRINT, ISD::FTRUNC, ISD::LROUND, ISD::LLROUND,
+ ISD::LRINT, ISD::LLRINT},
+ {MVT::f32, MVT::f64, MVT::f128}, Expand);
// Default ISD::TRAP to expand (which turns it into abort).
setOperationAction(ISD::TRAP, MVT::Other, Expand);
@@ -1394,6 +1378,16 @@ void TargetLoweringBase::computeRegisterProperties(
}
}
+ // Decide how to handle bf16. If the target does not have native bf16 support,
+ // promote it to f32, because there are no bf16 library calls (except for
+ // converting from f32 to bf16).
+ if (!isTypeLegal(MVT::bf16)) {
+ NumRegistersForVT[MVT::bf16] = NumRegistersForVT[MVT::f32];
+ RegisterTypeForVT[MVT::bf16] = RegisterTypeForVT[MVT::f32];
+ TransformToType[MVT::bf16] = MVT::f32;
+ ValueTypeActions.setTypeAction(MVT::bf16, TypePromoteFloat);
+ }
+
// Loop over all of the vector value types to see which need transformations.
for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE;
i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
@@ -1647,6 +1641,11 @@ bool TargetLoweringBase::isSuitableForJumpTable(const SwitchInst *SI,
(NumCases * 100 >= Range * MinDensity);
}
+MVT TargetLoweringBase::getPreferredSwitchConditionType(LLVMContext &Context,
+ EVT ConditionVT) const {
+ return getRegisterType(Context, ConditionVT);
+}
+
/// Get the EVTs and ArgFlags collections that represent the legalized return
/// type of the given function. This does not require a DAG or a return value,
/// and is suitable for use before any DAGs for the function are constructed.
@@ -2066,9 +2065,11 @@ static std::string getReciprocalOpName(bool IsSqrt, EVT VT) {
Name += IsSqrt ? "sqrt" : "div";
- // TODO: Handle "half" or other float types?
+ // TODO: Handle other float types?
if (VT.getScalarType() == MVT::f64) {
Name += "d";
+ } else if (VT.getScalarType() == MVT::f16) {
+ Name += "h";
} else {
assert(VT.getScalarType() == MVT::f32 &&
"Unexpected FP type for reciprocal estimate");
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index ce350034d073..f3d68bd9c92d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -310,7 +310,7 @@ void TargetLoweringObjectFileELF::emitModuleMetadata(MCStreamer &Streamer,
auto *S = C.getELFSection(".linker-options", ELF::SHT_LLVM_LINKER_OPTIONS,
ELF::SHF_EXCLUDE);
- Streamer.SwitchSection(S);
+ Streamer.switchSection(S);
for (const auto *Operand : LinkerOptions->operands()) {
if (cast<MDNode>(Operand)->getNumOperands() != 2)
@@ -326,7 +326,7 @@ void TargetLoweringObjectFileELF::emitModuleMetadata(MCStreamer &Streamer,
auto *S = C.getELFSection(".deplibs", ELF::SHT_LLVM_DEPENDENT_LIBRARIES,
ELF::SHF_MERGE | ELF::SHF_STRINGS, 1);
- Streamer.SwitchSection(S);
+ Streamer.switchSection(S);
for (const auto *Operand : DependentLibraries->operands()) {
Streamer.emitBytes(
@@ -350,7 +350,7 @@ void TargetLoweringObjectFileELF::emitModuleMetadata(MCStreamer &Streamer,
auto *S = C.getObjectFileInfo()->getPseudoProbeDescSection(
TM->getFunctionSections() ? Name->getString() : StringRef());
- Streamer.SwitchSection(S);
+ Streamer.switchSection(S);
Streamer.emitInt64(GUID->getZExtValue());
Streamer.emitInt64(Hash->getZExtValue());
Streamer.emitULEB128IntValue(Name->getString().size());
@@ -365,11 +365,11 @@ void TargetLoweringObjectFileELF::emitModuleMetadata(MCStreamer &Streamer,
GetObjCImageInfo(M, Version, Flags, Section);
if (!Section.empty()) {
auto *S = C.getELFSection(Section, ELF::SHT_PROGBITS, ELF::SHF_ALLOC);
- Streamer.SwitchSection(S);
+ Streamer.switchSection(S);
Streamer.emitLabel(C.getOrCreateSymbol(StringRef("OBJC_IMAGE_INFO")));
Streamer.emitInt32(Version);
Streamer.emitInt32(Flags);
- Streamer.AddBlankLine();
+ Streamer.addBlankLine();
}
emitCGProfileMetadata(Streamer, M);
@@ -399,7 +399,7 @@ void TargetLoweringObjectFileELF::emitPersonalityValue(
MCSection *Sec = getContext().getELFNamedSection(".data", Label->getName(),
ELF::SHT_PROGBITS, Flags, 0);
unsigned Size = DL.getPointerSize();
- Streamer.SwitchSection(Sec);
+ Streamer.switchSection(Sec);
Streamer.emitValueToAlignment(DL.getPointerABIAlignment(0).value());
Streamer.emitSymbolAttribute(Label, MCSA_ELF_TypeObject);
const MCExpr *E = MCConstantExpr::create(Size, getContext());
@@ -449,6 +449,9 @@ static SectionKind getELFKindForNamedSection(StringRef Name, SectionKind K) {
Name == ".llvmbc" || Name == ".llvmcmd")
return SectionKind::getMetadata();
+ if (Name == ".llvm.offloading")
+ return SectionKind::getExclude();
+
if (Name.empty() || Name[0] != '.') return K;
// Default implementation based on some magic section names.
@@ -507,9 +510,12 @@ static unsigned getELFSectionType(StringRef Name, SectionKind K) {
static unsigned getELFSectionFlags(SectionKind K) {
unsigned Flags = 0;
- if (!K.isMetadata())
+ if (!K.isMetadata() && !K.isExclude())
Flags |= ELF::SHF_ALLOC;
+ if (K.isExclude())
+ Flags |= ELF::SHF_EXCLUDE;
+
if (K.isText())
Flags |= ELF::SHF_EXECINSTR;
@@ -681,9 +687,10 @@ calcUniqueIDUpdateFlagsAndSize(const GlobalObject *GO, StringRef SectionName,
}
if (Retain) {
- if ((Ctx.getAsmInfo()->useIntegratedAssembler() ||
- Ctx.getAsmInfo()->binutilsIsAtLeast(2, 36)) &&
- !TM.getTargetTriple().isOSSolaris())
+ if (TM.getTargetTriple().isOSSolaris())
+ Flags |= ELF::SHF_SUNW_NODISCARD;
+ else if (Ctx.getAsmInfo()->useIntegratedAssembler() ||
+ Ctx.getAsmInfo()->binutilsIsAtLeast(2, 36))
Flags |= ELF::SHF_GNU_RETAIN;
return NextUniqueID++;
}
@@ -860,12 +867,15 @@ static MCSection *selectELFSectionForGlobal(
EmitUniqueSection = true;
Flags |= ELF::SHF_LINK_ORDER;
}
- if (Retain &&
- (Ctx.getAsmInfo()->useIntegratedAssembler() ||
- Ctx.getAsmInfo()->binutilsIsAtLeast(2, 36)) &&
- !TM.getTargetTriple().isOSSolaris()) {
- EmitUniqueSection = true;
- Flags |= ELF::SHF_GNU_RETAIN;
+ if (Retain) {
+ if (TM.getTargetTriple().isOSSolaris()) {
+ EmitUniqueSection = true;
+ Flags |= ELF::SHF_SUNW_NODISCARD;
+ } else if (Ctx.getAsmInfo()->useIntegratedAssembler() ||
+ Ctx.getAsmInfo()->binutilsIsAtLeast(2, 36)) {
+ EmitUniqueSection = true;
+ Flags |= ELF::SHF_GNU_RETAIN;
+ }
}
MCSectionELF *Section = selectELFSectionForGlobal(
@@ -1171,6 +1181,15 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
}
+MCSection *TargetLoweringObjectFileMachO::getStaticDtorSection(
+ unsigned Priority, const MCSymbol *KeySym) const {
+ // TODO(yln): Remove -lower-global-dtors-via-cxa-atexit fallback flag
+ // (LowerGlobalDtorsViaCxaAtExit) and always issue a fatal error here.
+ if (TM->Options.LowerGlobalDtorsViaCxaAtExit)
+ report_fatal_error("@llvm.global_dtors should have been lowered already");
+ return StaticDtorSection;
+}
+
void TargetLoweringObjectFileMachO::emitModuleMetadata(MCStreamer &Streamer,
Module &M) const {
// Emit the linker options if present.
@@ -1207,12 +1226,12 @@ void TargetLoweringObjectFileMachO::emitModuleMetadata(MCStreamer &Streamer,
// Get the section.
MCSectionMachO *S = getContext().getMachOSection(
Segment, Section, TAA, StubSize, SectionKind::getData());
- Streamer.SwitchSection(S);
+ Streamer.switchSection(S);
Streamer.emitLabel(getContext().
getOrCreateSymbol(StringRef("L_OBJC_IMAGE_INFO")));
Streamer.emitInt32(VersionVal);
Streamer.emitInt32(ImageInfoFlags);
- Streamer.AddBlankLine();
+ Streamer.addBlankLine();
}
static void checkMachOComdat(const GlobalValue *GV) {
@@ -1520,6 +1539,9 @@ getCOFFSectionFlags(SectionKind K, const TargetMachine &TM) {
if (K.isMetadata())
Flags |=
COFF::IMAGE_SCN_MEM_DISCARDABLE;
+ else if (K.isExclude())
+ Flags |=
+ COFF::IMAGE_SCN_LNK_REMOVE | COFF::IMAGE_SCN_MEM_DISCARDABLE;
else if (K.isText())
Flags |=
COFF::IMAGE_SCN_MEM_EXECUTE |
@@ -1755,11 +1777,11 @@ void TargetLoweringObjectFileCOFF::emitModuleMetadata(MCStreamer &Streamer,
COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
COFF::IMAGE_SCN_MEM_READ,
SectionKind::getReadOnly());
- Streamer.SwitchSection(S);
+ Streamer.switchSection(S);
Streamer.emitLabel(C.getOrCreateSymbol(StringRef("OBJC_IMAGE_INFO")));
Streamer.emitInt32(Version);
Streamer.emitInt32(Flags);
- Streamer.AddBlankLine();
+ Streamer.addBlankLine();
}
emitCGProfileMetadata(Streamer, M);
@@ -1772,7 +1794,7 @@ void TargetLoweringObjectFileCOFF::emitLinkerDirectives(
// spec, this section is a space-separated string containing flags for
// linker.
MCSection *Sec = getDrectveSection();
- Streamer.SwitchSection(Sec);
+ Streamer.switchSection(Sec);
for (const auto *Option : LinkerOptions->operands()) {
for (const auto &Piece : cast<MDNode>(Option)->operands()) {
// Lead with a space for consistency with our dllexport implementation.
@@ -1791,7 +1813,7 @@ void TargetLoweringObjectFileCOFF::emitLinkerDirectives(
getMangler());
OS.flush();
if (!Flags.empty()) {
- Streamer.SwitchSection(getDrectveSection());
+ Streamer.switchSection(getDrectveSection());
Streamer.emitBytes(Flags);
}
Flags.clear();
@@ -1817,7 +1839,7 @@ void TargetLoweringObjectFileCOFF::emitLinkerDirectives(
OS.flush();
if (!Flags.empty()) {
- Streamer.SwitchSection(getDrectveSection());
+ Streamer.switchSection(getDrectveSection());
Streamer.emitBytes(Flags);
}
Flags.clear();
@@ -2170,8 +2192,7 @@ MCSection *TargetLoweringObjectFileWasm::getStaticCtorSection(
MCSection *TargetLoweringObjectFileWasm::getStaticDtorSection(
unsigned Priority, const MCSymbol *KeySym) const {
- llvm_unreachable("@llvm.global_dtors should have been lowered already");
- return nullptr;
+ report_fatal_error("@llvm.global_dtors should have been lowered already");
}
//===----------------------------------------------------------------------===//
@@ -2544,10 +2565,24 @@ MCSection *TargetLoweringObjectFileXCOFF::getSectionForTOCEntry(
XCOFF::XTY_SD));
}
+MCSection *TargetLoweringObjectFileXCOFF::getSectionForLSDA(
+ const Function &F, const MCSymbol &FnSym, const TargetMachine &TM) const {
+ auto *LSDA = cast<MCSectionXCOFF>(LSDASection);
+ if (TM.getFunctionSections()) {
+ // If option -ffunction-sections is on, append the function name to the
+ // name of the LSDA csect so that each function has its own LSDA csect.
+ // This helps the linker to garbage-collect EH info of unused functions.
+ SmallString<128> NameStr = LSDA->getName();
+ raw_svector_ostream(NameStr) << '.' << F.getName();
+ LSDA = getContext().getXCOFFSection(NameStr, LSDA->getKind(),
+ LSDA->getCsectProp());
+ }
+ return LSDA;
+}
//===----------------------------------------------------------------------===//
// GOFF
//===----------------------------------------------------------------------===//
-TargetLoweringObjectFileGOFF::TargetLoweringObjectFileGOFF() {}
+TargetLoweringObjectFileGOFF::TargetLoweringObjectFileGOFF() = default;
MCSection *TargetLoweringObjectFileGOFF::getExplicitSectionGlobal(
const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
@@ -2558,8 +2593,8 @@ MCSection *TargetLoweringObjectFileGOFF::SelectSectionForGlobal(
const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
auto *Symbol = TM.getSymbol(GO);
if (Kind.isBSS())
- return getContext().getGOFFSection(Symbol->getName(),
- SectionKind::getBSS());
+ return getContext().getGOFFSection(Symbol->getName(), SectionKind::getBSS(),
+ nullptr, nullptr);
return getContext().getObjectFileInfo()->getTextSection();
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetOptionsImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetOptionsImpl.cpp
index 0731cf9b28f4..af5d10103f78 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetOptionsImpl.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetOptionsImpl.cpp
@@ -15,7 +15,6 @@
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/Module.h"
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp
index 05004fb935df..0bd229f4fc68 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -22,6 +22,7 @@
#include "llvm/Analysis/ScopedNoAliasAA.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
+#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"
#include "llvm/CodeGen/CSEConfigBase.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachinePassRegistry.h"
@@ -47,7 +48,6 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils.h"
-#include "llvm/Transforms/Utils/SymbolRewriter.h"
#include <cassert>
#include <string>
@@ -115,20 +115,18 @@ static cl::opt<bool> PrintGCInfo("print-gc", cl::Hidden,
cl::desc("Dump garbage collector data"));
static cl::opt<cl::boolOrDefault>
VerifyMachineCode("verify-machineinstrs", cl::Hidden,
- cl::desc("Verify generated machine code"),
- cl::ZeroOrMore);
-static cl::opt<cl::boolOrDefault> DebugifyAndStripAll(
- "debugify-and-strip-all-safe", cl::Hidden,
- cl::desc(
- "Debugify MIR before and Strip debug after "
- "each pass except those known to be unsafe when debug info is present"),
- cl::ZeroOrMore);
+ cl::desc("Verify generated machine code"));
+static cl::opt<cl::boolOrDefault>
+ DebugifyAndStripAll("debugify-and-strip-all-safe", cl::Hidden,
+ cl::desc("Debugify MIR before and Strip debug after "
+ "each pass except those known to be unsafe "
+ "when debug info is present"));
static cl::opt<cl::boolOrDefault> DebugifyCheckAndStripAll(
"debugify-check-and-strip-all-safe", cl::Hidden,
cl::desc(
"Debugify MIR before, by checking and stripping the debug info after, "
- "each pass except those known to be unsafe when debug info is present"),
- cl::ZeroOrMore);
+ "each pass except those known to be unsafe when debug info is "
+ "present"));
// Enable or disable the MachineOutliner.
static cl::opt<RunOutliner> EnableMachineOutliner(
"enable-machine-outliner", cl::desc("Enable the machine outliner"),
@@ -139,6 +137,11 @@ static cl::opt<RunOutliner> EnableMachineOutliner(
"Disable all outlining"),
// Sentinel value for unspecified option.
clEnumValN(RunOutliner::AlwaysOutline, "", "")));
+// Disable the pass to fix unwind information. Whether the pass is included in
+// the pipeline is controlled via the target options, this option serves as
+// manual override.
+static cl::opt<bool> DisableCFIFixup("disable-cfi-fixup", cl::Hidden,
+ cl::desc("Disable the CFI fixup pass"));
// Enable or disable FastISel. Both options are needed, because
// FastISel is enabled by default with -fast, and we wish to be
// able to enable or disable fast-isel independently from -O0.
@@ -175,12 +178,12 @@ static cl::opt<bool>
// Disable MIRProfileLoader before RegAlloc. This is for for debugging and
// tuning purpose.
static cl::opt<bool> DisableRAFSProfileLoader(
- "disable-ra-fsprofile-loader", cl::init(true), cl::Hidden,
+ "disable-ra-fsprofile-loader", cl::init(false), cl::Hidden,
cl::desc("Disable MIRProfileLoader before RegAlloc"));
// Disable MIRProfileLoader before BloackPlacement. This is for for debugging
// and tuning purpose.
static cl::opt<bool> DisableLayoutFSProfileLoader(
- "disable-layout-fsprofile-loader", cl::init(true), cl::Hidden,
+ "disable-layout-fsprofile-loader", cl::init(false), cl::Hidden,
cl::desc("Disable MIRProfileLoader before BlockPlacement"));
// Specify FSProfile file name.
static cl::opt<std::string>
@@ -256,6 +259,11 @@ static cl::opt<bool> DisableExpandReductions(
"disable-expand-reductions", cl::init(false), cl::Hidden,
cl::desc("Disable the expand reduction intrinsics pass from running"));
+/// Disable the select optimization pass.
+static cl::opt<bool> DisableSelectOptimize(
+ "disable-select-optimize", cl::init(true), cl::Hidden,
+ cl::desc("Disable the select-optimization pass from running"));
+
/// Allow standard passes to be disabled by command line options. This supports
/// simple binary flags that either suppress the pass or do nothing.
/// i.e. -disable-mypass=false has no effect.
@@ -490,6 +498,7 @@ CGPassBuilderOption llvm::getCGPassBuilderOption() {
SET_BOOLEAN_OPTION(DisableConstantHoisting)
SET_BOOLEAN_OPTION(DisableCGP)
SET_BOOLEAN_OPTION(DisablePartialLibcallInlining)
+ SET_BOOLEAN_OPTION(DisableSelectOptimize)
SET_BOOLEAN_OPTION(PrintLSR)
SET_BOOLEAN_OPTION(PrintISelInput)
SET_BOOLEAN_OPTION(PrintGCInfo)
@@ -736,21 +745,21 @@ void TargetPassConfig::addPass(Pass *P) {
if (StopBefore == PassID && StopBeforeCount++ == StopBeforeInstanceNum)
Stopped = true;
if (Started && !Stopped) {
- if (AddingMachinePasses)
+ if (AddingMachinePasses) {
+ // Construct banner message before PM->add() as that may delete the pass.
+ std::string Banner =
+ std::string("After ") + std::string(P->getPassName());
addMachinePrePasses();
- std::string Banner;
- // Construct banner message before PM->add() as that may delete the pass.
- if (AddingMachinePasses)
- Banner = std::string("After ") + std::string(P->getPassName());
- PM->add(P);
- if (AddingMachinePasses)
+ PM->add(P);
addMachinePostPasses(Banner);
+ } else {
+ PM->add(P);
+ }
// Add the passes after the pass P if there is any.
- for (const auto &IP : Impl->InsertedPasses) {
+ for (const auto &IP : Impl->InsertedPasses)
if (IP.TargetPassID == PassID)
addPass(IP.getInsertedPass());
- }
} else {
delete P;
}
@@ -895,6 +904,12 @@ void TargetPassConfig::addIRPasses() {
addPass(&ShadowStackGCLoweringID);
addPass(createLowerConstantIntrinsicsPass());
+ // For MachO, lower @llvm.global_dtors into @llvm_global_ctors with
+ // __cxa_atexit() calls to avoid emitting the deprecated __mod_term_func.
+ if (TM->getTargetTriple().isOSBinFormatMachO() &&
+ TM->Options.LowerGlobalDtorsViaCxaAtExit)
+ addPass(createLowerGlobalDtorsLegacyPass());
+
// Make sure that no unreachable blocks are instruction selected.
addPass(createUnreachableBlockEliminationPass());
@@ -922,6 +937,13 @@ void TargetPassConfig::addIRPasses() {
// Allow disabling it for testing purposes.
if (!DisableExpandReductions)
addPass(createExpandReductionsPass());
+
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(createTLSVariableHoistPass());
+
+ // Convert conditional moves to conditional jumps when profitable.
+ if (getOptLevel() != CodeGenOpt::None && !DisableSelectOptimize)
+ addPass(createSelectOptimizePass());
}
/// Turn exception handling constructs into something the code generators can
@@ -1261,12 +1283,19 @@ void TargetPassConfig::addMachinePasses() {
// FIXME: In principle, BasicBlockSection::Labels and splitting can used
// together. Update this check once we have addressed any issues.
if (TM->getBBSectionsType() != llvm::BasicBlockSection::None) {
- addPass(llvm::createBasicBlockSectionsPass(TM->getBBSectionsFuncListBuf()));
+ if (TM->getBBSectionsType() == llvm::BasicBlockSection::List) {
+ addPass(llvm::createBasicBlockSectionsProfileReaderPass(
+ TM->getBBSectionsFuncListBuf()));
+ }
+ addPass(llvm::createBasicBlockSectionsPass());
} else if (TM->Options.EnableMachineFunctionSplitter ||
EnableMachineFunctionSplitter) {
addPass(createMachineFunctionSplitterPass());
}
+ if (!DisableCFIFixup && TM->Options.EnableCFIFixup)
+ addPass(createCFIFixup());
+
// Add passes that directly emit MI after all other MI passes.
addPreEmitPass2();
@@ -1376,6 +1405,11 @@ FunctionPass *TargetPassConfig::createRegAllocPass(bool Optimized) {
return createTargetRegisterAllocator(Optimized);
}
+bool TargetPassConfig::isCustomizedRegAlloc() {
+ return RegAlloc !=
+ (RegisterRegAlloc::FunctionPassCtor)&useDefaultRegisterAllocator;
+}
+
bool TargetPassConfig::addRegAssignAndRewriteFast() {
if (RegAlloc != (RegisterRegAlloc::FunctionPassCtor)&useDefaultRegisterAllocator &&
RegAlloc != (RegisterRegAlloc::FunctionPassCtor)&createFastRegisterAllocator)
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp
index 6bcf79547056..ac346585b0f8 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp
@@ -16,10 +16,11 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetSchedule.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetSchedule.cpp
index ce59452fd1b8..ac07c86cab85 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetSchedule.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetSchedule.cpp
@@ -16,7 +16,6 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
-#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrItineraries.h"
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetSubtargetInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetSubtargetInfo.cpp
index e4520d8ccb1e..ba2c8dda7de5 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetSubtargetInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetSubtargetInfo.cpp
@@ -45,10 +45,6 @@ bool TargetSubtargetInfo::enableRALocalReassignment(
return true;
}
-bool TargetSubtargetInfo::enableAdvancedRASplitCost() const {
- return false;
-}
-
bool TargetSubtargetInfo::enablePostRAScheduler() const {
return getSchedModel().PostRAScheduler;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
index dfd962be2882..c44fd9f97383 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -28,7 +28,6 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/iterator_range.h"
@@ -50,7 +49,6 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/MC/MCInstrDesc.h"
-#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Pass.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
@@ -163,6 +161,7 @@ class TwoAddressInstructionPass : public MachineFunctionPass {
bool collectTiedOperands(MachineInstr *MI, TiedOperandMap&);
void processTiedPairs(MachineInstr *MI, TiedPairList&, unsigned &Dist);
void eliminateRegSequence(MachineBasicBlock::iterator&);
+ bool processStatepoint(MachineInstr *MI, TiedOperandMap &TiedOperands);
public:
static char ID; // Pass identification, replacement for typeid
@@ -1629,6 +1628,61 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
}
}
+// For every tied operand pair this function transforms statepoint from
+// RegA = STATEPOINT ... RegB(tied-def N)
+// to
+// RegB = STATEPOINT ... RegB(tied-def N)
+// and replaces all uses of RegA with RegB.
+// No extra COPY instruction is necessary because tied use is killed at
+// STATEPOINT.
+bool TwoAddressInstructionPass::processStatepoint(
+ MachineInstr *MI, TiedOperandMap &TiedOperands) {
+
+ bool NeedCopy = false;
+ for (auto &TO : TiedOperands) {
+ Register RegB = TO.first;
+ if (TO.second.size() != 1) {
+ NeedCopy = true;
+ continue;
+ }
+
+ unsigned SrcIdx = TO.second[0].first;
+ unsigned DstIdx = TO.second[0].second;
+
+ MachineOperand &DstMO = MI->getOperand(DstIdx);
+ Register RegA = DstMO.getReg();
+
+ assert(RegB == MI->getOperand(SrcIdx).getReg());
+
+ if (RegA == RegB)
+ continue;
+
+ MRI->replaceRegWith(RegA, RegB);
+
+ if (LIS) {
+ VNInfo::Allocator &A = LIS->getVNInfoAllocator();
+ LiveInterval &LI = LIS->getInterval(RegB);
+ for (auto &S : LIS->getInterval(RegA)) {
+ VNInfo *VNI = LI.getNextValue(S.start, A);
+ LiveRange::Segment NewSeg(S.start, S.end, VNI);
+ LI.addSegment(NewSeg);
+ }
+ LIS->removeInterval(RegA);
+ }
+
+ if (LV) {
+ if (MI->getOperand(SrcIdx).isKill())
+ LV->removeVirtualRegisterKilled(RegB, *MI);
+ LiveVariables::VarInfo &SrcInfo = LV->getVarInfo(RegB);
+ LiveVariables::VarInfo &DstInfo = LV->getVarInfo(RegA);
+ SrcInfo.AliveBlocks |= DstInfo.AliveBlocks;
+ for (auto *KillMI : DstInfo.Kills)
+ LV->addVirtualRegisterKilled(RegB, *KillMI, false);
+ }
+ }
+ return !NeedCopy;
+}
+
/// Reduce two-address instructions to two operands.
bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
MF = &Func;
@@ -1722,6 +1776,14 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
}
}
+ if (mi->getOpcode() == TargetOpcode::STATEPOINT &&
+ processStatepoint(&*mi, TiedOperands)) {
+ TiedOperands.clear();
+ LLVM_DEBUG(dbgs() << "\t\trewrite to:\t" << *mi);
+ mi = nmi;
+ continue;
+ }
+
// Now iterate over the information collected above.
for (auto &TO : TiedOperands) {
processTiedPairs(&*mi, TO.second, Dist);
@@ -1733,11 +1795,11 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
// From %reg = INSERT_SUBREG %reg, %subreg, subidx
// To %reg:subidx = COPY %subreg
unsigned SubIdx = mi->getOperand(3).getImm();
- mi->RemoveOperand(3);
+ mi->removeOperand(3);
assert(mi->getOperand(0).getSubReg() == 0 && "Unexpected subreg idx");
mi->getOperand(0).setSubReg(SubIdx);
mi->getOperand(0).setIsUndef(mi->getOperand(1).isUndef());
- mi->RemoveOperand(1);
+ mi->removeOperand(1);
mi->setDesc(TII->get(TargetOpcode::COPY));
LLVM_DEBUG(dbgs() << "\t\tconvert to:\t" << *mi);
@@ -1858,7 +1920,7 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {
LLVM_DEBUG(dbgs() << "Turned: " << MI << " into an IMPLICIT_DEF");
MI.setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
for (int j = MI.getNumOperands() - 1, ee = 0; j > ee; --j)
- MI.RemoveOperand(j);
+ MI.removeOperand(j);
} else {
if (LIS)
LIS->RemoveMachineInstrFromMaps(MI);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp
index 01ea171e5ea2..166a3c413f6a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp
@@ -24,15 +24,13 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
-#include "llvm/IR/Verifier.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
@@ -44,9 +42,9 @@
using namespace llvm;
-static cl::opt<bool>
-DisablePromotion("disable-type-promotion", cl::Hidden, cl::init(false),
- cl::desc("Disable type promotion pass"));
+static cl::opt<bool> DisablePromotion("disable-type-promotion", cl::Hidden,
+ cl::init(false),
+ cl::desc("Disable type promotion pass"));
// The goal of this pass is to enable more efficient code generation for
// operations on narrow types (i.e. types with < 32-bits) and this is a
@@ -103,17 +101,16 @@ DisablePromotion("disable-type-promotion", cl::Hidden, cl::init(false),
namespace {
class IRPromoter {
LLVMContext &Ctx;
- IntegerType *OrigTy = nullptr;
unsigned PromotedWidth = 0;
- SetVector<Value*> &Visited;
- SetVector<Value*> &Sources;
- SetVector<Instruction*> &Sinks;
+ SetVector<Value *> &Visited;
+ SetVector<Value *> &Sources;
+ SetVector<Instruction *> &Sinks;
SmallPtrSetImpl<Instruction *> &SafeWrap;
IntegerType *ExtTy = nullptr;
- SmallPtrSet<Value*, 8> NewInsts;
- SmallPtrSet<Instruction*, 4> InstsToRemove;
- DenseMap<Value*, SmallVector<Type*, 4>> TruncTysMap;
- SmallPtrSet<Value*, 8> Promoted;
+ SmallPtrSet<Value *, 8> NewInsts;
+ SmallPtrSet<Instruction *, 4> InstsToRemove;
+ DenseMap<Value *, SmallVector<Type *, 4>> TruncTysMap;
+ SmallPtrSet<Value *, 8> Promoted;
void ReplaceAllUsersOfWith(Value *From, Value *To);
void ExtendSources();
@@ -123,16 +120,13 @@ class IRPromoter {
void Cleanup();
public:
- IRPromoter(LLVMContext &C, IntegerType *Ty, unsigned Width,
+ IRPromoter(LLVMContext &C, unsigned Width,
SetVector<Value *> &visited, SetVector<Value *> &sources,
SetVector<Instruction *> &sinks,
SmallPtrSetImpl<Instruction *> &wrap)
- : Ctx(C), OrigTy(Ty), PromotedWidth(Width), Visited(visited),
+ : Ctx(C), PromotedWidth(Width), Visited(visited),
Sources(sources), Sinks(sinks), SafeWrap(wrap) {
ExtTy = IntegerType::get(Ctx, PromotedWidth);
- assert(OrigTy->getPrimitiveSizeInBits().getFixedSize() <
- ExtTy->getPrimitiveSizeInBits().getFixedSize() &&
- "Original type not smaller than extended type");
}
void Mutate();
@@ -142,8 +136,8 @@ class TypePromotion : public FunctionPass {
unsigned TypeSize = 0;
LLVMContext *Ctx = nullptr;
unsigned RegisterBitWidth = 0;
- SmallPtrSet<Value*, 16> AllVisited;
- SmallPtrSet<Instruction*, 8> SafeToPromote;
+ SmallPtrSet<Value *, 16> AllVisited;
+ SmallPtrSet<Instruction *, 8> SafeToPromote;
SmallPtrSet<Instruction *, 4> SafeWrap;
// Does V have the same size result type as TypeSize.
@@ -190,7 +184,7 @@ public:
bool runOnFunction(Function &F) override;
};
-}
+} // namespace
static bool GenerateSignBits(Instruction *I) {
unsigned Opc = I->getOpcode();
@@ -245,7 +239,7 @@ bool TypePromotion::isSource(Value *V) {
bool TypePromotion::isSink(Value *V) {
// TODO The truncate also isn't actually necessary because we would already
// proved that the data value is kept within the range of the original data
- // type.
+ // type. We currently remove any truncs inserted for handling zext sinks.
// Sinks are:
// - points where the value in the register is being observed, such as an
@@ -269,7 +263,7 @@ bool TypePromotion::isSink(Value *V) {
/// Return whether this instruction can safely wrap.
bool TypePromotion::isSafeWrap(Instruction *I) {
- // We can support a, potentially, wrapping instruction (I) if:
+ // We can support a potentially wrapping instruction (I) if:
// - It is only used by an unsigned icmp.
// - The icmp uses a constant.
// - The wrapping value (I) is decreasing, i.e would underflow - wrapping
@@ -356,7 +350,7 @@ bool TypePromotion::isSafeWrap(Instruction *I) {
if (!OverflowConst.isNonPositive())
return false;
- // Using C1 = OverflowConst and C2 = ICmpConst, we can use either prove that:
+ // Using C1 = OverflowConst and C2 = ICmpConst, we can either prove that:
// zext(x) + sext(C1) <u zext(C2) if C1 < 0 and C1 >s C2
// zext(x) + sext(C1) <u sext(C2) if C1 < 0 and C1 <=s C2
if (OverflowConst.sgt(ICmpConst)) {
@@ -404,7 +398,7 @@ static bool isPromotedResultSafe(Instruction *I) {
}
void IRPromoter::ReplaceAllUsersOfWith(Value *From, Value *To) {
- SmallVector<Instruction*, 4> Users;
+ SmallVector<Instruction *, 4> Users;
Instruction *InstTo = dyn_cast<Instruction>(To);
bool ReplacedAll = true;
@@ -485,12 +479,18 @@ void IRPromoter::PromoteTree() {
continue;
if (auto *Const = dyn_cast<ConstantInt>(Op)) {
- Constant *NewConst = SafeWrap.contains(I)
+ // For subtract, we don't need to sext the constant. We only put it in
+ // SafeWrap because SafeWrap.size() is used elsewhere.
+ // For cmp, we need to sign extend a constant appearing in either
+ // operand. For add, we should only sign extend the RHS.
+ Constant *NewConst = (SafeWrap.contains(I) &&
+ (I->getOpcode() == Instruction::ICmp || i == 1) &&
+ I->getOpcode() != Instruction::Sub)
? ConstantExpr::getSExt(Const, ExtTy)
: ConstantExpr::getZExt(Const, ExtTy);
I->setOperand(i, NewConst);
} else if (isa<UndefValue>(Op))
- I->setOperand(i, UndefValue::get(ExtTy));
+ I->setOperand(i, ConstantInt::get(ExtTy, 0));
}
// Mutate the result type, unless this is an icmp or switch.
@@ -506,7 +506,7 @@ void IRPromoter::TruncateSinks() {
IRBuilder<> Builder{Ctx};
- auto InsertTrunc = [&](Value *V, Type *TruncTy) -> Instruction* {
+ auto InsertTrunc = [&](Value *V, Type *TruncTy) -> Instruction * {
if (!isa<Instruction>(V) || !isa<IntegerType>(V->getType()))
return nullptr;
@@ -514,7 +514,7 @@ void IRPromoter::TruncateSinks() {
return nullptr;
LLVM_DEBUG(dbgs() << "IR Promotion: Creating " << *TruncTy << " Trunc for "
- << *V << "\n");
+ << *V << "\n");
Builder.SetInsertPoint(cast<Instruction>(V));
auto *Trunc = dyn_cast<Instruction>(Builder.CreateTrunc(V, TruncTy));
if (Trunc)
@@ -550,6 +550,11 @@ void IRPromoter::TruncateSinks() {
continue;
}
+ // Don't insert a trunc for a zext which can still legally promote.
+ if (auto ZExt = dyn_cast<ZExtInst>(I))
+ if (ZExt->getType()->getScalarSizeInBits() > PromotedWidth)
+ continue;
+
// Now handle the others.
for (unsigned i = 0; i < I->getNumOperands(); ++i) {
Type *Ty = TruncTysMap[I][i];
@@ -576,16 +581,14 @@ void IRPromoter::Cleanup() {
Value *Src = ZExt->getOperand(0);
if (ZExt->getSrcTy() == ZExt->getDestTy()) {
LLVM_DEBUG(dbgs() << "IR Promotion: Removing unnecessary cast: " << *ZExt
- << "\n");
+ << "\n");
ReplaceAllUsersOfWith(ZExt, Src);
continue;
}
- // Unless they produce a value that is narrower than ExtTy, we can
- // replace the result of the zext with the input of a newly inserted
- // trunc.
- if (NewInsts.count(Src) && isa<TruncInst>(Src) &&
- Src->getType() == OrigTy) {
+ // We've inserted a trunc for a zext sink, but we already know that the
+ // input is in range, negating the need for the trunc.
+ if (NewInsts.count(Src) && isa<TruncInst>(Src)) {
auto *Trunc = cast<TruncInst>(Src);
assert(Trunc->getOperand(0)->getType() == ExtTy &&
"expected inserted trunc to be operating on i32");
@@ -615,7 +618,7 @@ void IRPromoter::ConvertTruncs() {
unsigned NumBits = DestTy->getScalarSizeInBits();
ConstantInt *Mask =
- ConstantInt::get(SrcTy, APInt::getMaxValue(NumBits).getZExtValue());
+ ConstantInt::get(SrcTy, APInt::getMaxValue(NumBits).getZExtValue());
Value *Masked = Builder.CreateAnd(Trunc->getOperand(0), Mask);
if (auto *I = dyn_cast<Instruction>(Masked))
@@ -626,8 +629,8 @@ void IRPromoter::ConvertTruncs() {
}
void IRPromoter::Mutate() {
- LLVM_DEBUG(dbgs() << "IR Promotion: Promoting use-def chains from "
- << OrigTy->getBitWidth() << " to " << PromotedWidth << "-bits\n");
+ LLVM_DEBUG(dbgs() << "IR Promotion: Promoting use-def chains to "
+ << PromotedWidth << "-bits\n");
// Cache original types of the values that will likely need truncating
for (auto *I : Sinks) {
@@ -677,8 +680,7 @@ bool TypePromotion::isSupportedType(Value *V) {
if (Ty->isVoidTy() || Ty->isPointerTy())
return true;
- if (!isa<IntegerType>(Ty) ||
- cast<IntegerType>(Ty)->getBitWidth() == 1 ||
+ if (!isa<IntegerType>(Ty) || cast<IntegerType>(Ty)->getBitWidth() == 1 ||
cast<IntegerType>(Ty)->getBitWidth() > RegisterBitWidth)
return false;
@@ -738,13 +740,12 @@ bool TypePromotion::isSupportedValue(Value *V) {
/// smaller than the targeted promoted type. Check that we're not trying to
/// promote something larger than our base 'TypeSize' type.
bool TypePromotion::isLegalToPromote(Value *V) {
-
auto *I = dyn_cast<Instruction>(V);
if (!I)
return true;
if (SafeToPromote.count(I))
- return true;
+ return true;
if (isPromotedResultSafe(I) || isSafeWrap(I)) {
SafeToPromote.insert(I);
@@ -765,10 +766,10 @@ bool TypePromotion::TryToPromote(Value *V, unsigned PromotedWidth) {
LLVM_DEBUG(dbgs() << "IR Promotion: TryToPromote: " << *V << ", from "
<< TypeSize << " bits to " << PromotedWidth << "\n");
- SetVector<Value*> WorkList;
- SetVector<Value*> Sources;
- SetVector<Instruction*> Sinks;
- SetVector<Value*> CurrentVisited;
+ SetVector<Value *> WorkList;
+ SetVector<Value *> Sources;
+ SetVector<Instruction *> Sinks;
+ SetVector<Value *> CurrentVisited;
WorkList.insert(V);
// Return true if V was added to the worklist as a supported instruction,
@@ -839,14 +840,15 @@ bool TypePromotion::TryToPromote(Value *V, unsigned PromotedWidth) {
}
}
- LLVM_DEBUG(dbgs() << "IR Promotion: Visited nodes:\n";
- for (auto *I : CurrentVisited)
- I->dump();
- );
+ LLVM_DEBUG({
+ dbgs() << "IR Promotion: Visited nodes:\n";
+ for (auto *I : CurrentVisited)
+ I->dump();
+ });
unsigned ToPromote = 0;
unsigned NonFreeArgs = 0;
- SmallPtrSet<BasicBlock*, 4> Blocks;
+ SmallPtrSet<BasicBlock *, 4> Blocks;
for (auto *V : CurrentVisited) {
if (auto *I = dyn_cast<Instruction>(V))
Blocks.insert(I->getParent());
@@ -860,16 +862,16 @@ bool TypePromotion::TryToPromote(Value *V, unsigned PromotedWidth) {
if (Sinks.count(cast<Instruction>(V)))
continue;
- ++ToPromote;
- }
+ ++ToPromote;
+ }
// DAG optimizations should be able to handle these cases better, especially
// for function arguments.
if (ToPromote < 2 || (Blocks.size() == 1 && (NonFreeArgs > SafeWrap.size())))
return false;
- IRPromoter Promoter(*Ctx, cast<IntegerType>(OrigTy), PromotedWidth,
- CurrentVisited, Sources, Sinks, SafeWrap);
+ IRPromoter Promoter(*Ctx, PromotedWidth, CurrentVisited, Sources, Sinks,
+ SafeWrap);
Promoter.Mutate();
return true;
}
@@ -893,14 +895,14 @@ bool TypePromotion::runOnFunction(Function &F) {
const TargetSubtargetInfo *SubtargetInfo = TM.getSubtargetImpl(F);
const TargetLowering *TLI = SubtargetInfo->getTargetLowering();
const TargetTransformInfo &TII =
- getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
RegisterBitWidth =
TII.getRegisterBitWidth(TargetTransformInfo::RGK_Scalar).getFixedSize();
Ctx = &F.getParent()->getContext();
// Search up from icmps to try to promote their operands.
for (BasicBlock &BB : F) {
- for (auto &I : BB) {
+ for (Instruction &I : BB) {
if (AllVisited.count(&I))
continue;
@@ -909,8 +911,7 @@ bool TypePromotion::runOnFunction(Function &F) {
auto *ICmp = cast<ICmpInst>(&I);
// Skip signed or pointer compares
- if (ICmp->isSigned() ||
- !isa<IntegerType>(ICmp->getOperand(0)->getType()))
+ if (ICmp->isSigned() || !isa<IntegerType>(ICmp->getOperand(0)->getType()))
continue;
LLVM_DEBUG(dbgs() << "IR Promotion: Searching from: " << *ICmp << "\n");
@@ -921,13 +922,13 @@ bool TypePromotion::runOnFunction(Function &F) {
if (SrcVT.isSimple() && TLI->isTypeLegal(SrcVT.getSimpleVT()))
break;
- if (TLI->getTypeAction(ICmp->getContext(), SrcVT) !=
+ if (TLI->getTypeAction(*Ctx, SrcVT) !=
TargetLowering::TypePromoteInteger)
break;
- EVT PromotedVT = TLI->getTypeToTransformTo(ICmp->getContext(), SrcVT);
+ EVT PromotedVT = TLI->getTypeToTransformTo(*Ctx, SrcVT);
if (RegisterBitWidth < PromotedVT.getFixedSizeInBits()) {
LLVM_DEBUG(dbgs() << "IR Promotion: Couldn't find target register "
- << "for promoted type\n");
+ << "for promoted type\n");
break;
}
@@ -936,13 +937,7 @@ bool TypePromotion::runOnFunction(Function &F) {
}
}
}
- LLVM_DEBUG(if (verifyFunction(F, &dbgs())) {
- dbgs() << F;
- report_fatal_error("Broken function after type promotion");
- });
}
- if (MadeChange)
- LLVM_DEBUG(dbgs() << "After TypePromotion: " << F << "\n");
AllVisited.clear();
SafeToPromote.clear();
@@ -956,6 +951,4 @@ INITIALIZE_PASS_END(TypePromotion, DEBUG_TYPE, PASS_NAME, false, false)
char TypePromotion::ID = 0;
-FunctionPass *llvm::createTypePromotionPass() {
- return new TypePromotion();
-}
+FunctionPass *llvm::createTypePromotionPass() { return new TypePromotion(); }
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/UnreachableBlockElim.cpp b/contrib/llvm-project/llvm/lib/CodeGen/UnreachableBlockElim.cpp
index 3426a03b6083..5e8514f525e9 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/UnreachableBlockElim.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/UnreachableBlockElim.cpp
@@ -26,16 +26,10 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
-#include "llvm/IR/CFG.h"
-#include "llvm/IR/Constant.h"
#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Type.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -131,8 +125,8 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
for (unsigned i = start->getNumOperands() - 1; i >= 2; i-=2)
if (start->getOperand(i).isMBB() &&
start->getOperand(i).getMBB() == &BB) {
- start->RemoveOperand(i);
- start->RemoveOperand(i-1);
+ start->removeOperand(i);
+ start->removeOperand(i-1);
}
start++;
@@ -162,8 +156,8 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
while (phi != BB.end() && phi->isPHI()) {
for (unsigned i = phi->getNumOperands() - 1; i >= 2; i-=2)
if (!preds.count(phi->getOperand(i).getMBB())) {
- phi->RemoveOperand(i);
- phi->RemoveOperand(i-1);
+ phi->removeOperand(i);
+ phi->removeOperand(i-1);
ModifiedPHI = true;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp b/contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp
index 5f59cb4643f2..8b5b585090f5 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp
@@ -27,7 +27,6 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSchedule.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
-#include "llvm/IR/Function.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -43,19 +42,18 @@ using namespace llvm;
#define DEBUG_TYPE "machine-scheduler"
static cl::opt<bool> IgnoreBBRegPressure("ignore-bb-reg-pressure", cl::Hidden,
- cl::ZeroOrMore, cl::init(false));
+ cl::init(false));
static cl::opt<bool> UseNewerCandidate("use-newer-candidate", cl::Hidden,
- cl::ZeroOrMore, cl::init(true));
+ cl::init(true));
static cl::opt<unsigned> SchedDebugVerboseLevel("misched-verbose-level",
- cl::Hidden, cl::ZeroOrMore,
- cl::init(1));
+ cl::Hidden, cl::init(1));
// Check if the scheduler should penalize instructions that are available to
// early due to a zero-latency dependence.
static cl::opt<bool> CheckEarlyAvail("check-early-avail", cl::Hidden,
- cl::ZeroOrMore, cl::init(true));
+ cl::init(true));
// This value is used to determine if a register class is a high pressure set.
// We compute the maximum number of registers needed and divided by the total
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp
index 0c42bef82005..f577aff39ea7 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp
@@ -12,6 +12,7 @@
#include "llvm/IR/Type.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TypeSize.h"
+#include "llvm/Support/WithColor.h"
using namespace llvm;
EVT EVT::changeExtendedTypeToInteger() const {
@@ -179,19 +180,22 @@ std::string EVT::getEVTString() const {
/// specified EVT. For integer types, this returns an unsigned type. Note
/// that this will abort for types that cannot be represented.
Type *EVT::getTypeForEVT(LLVMContext &Context) const {
+ // clang-format off
switch (V.SimpleTy) {
default:
assert(isExtended() && "Type is not extended!");
return LLVMTy;
case MVT::isVoid: return Type::getVoidTy(Context);
case MVT::i1: return Type::getInt1Ty(Context);
+ case MVT::i2: return Type::getIntNTy(Context, 2);
+ case MVT::i4: return Type::getIntNTy(Context, 4);
case MVT::i8: return Type::getInt8Ty(Context);
case MVT::i16: return Type::getInt16Ty(Context);
case MVT::i32: return Type::getInt32Ty(Context);
case MVT::i64: return Type::getInt64Ty(Context);
case MVT::i128: return IntegerType::get(Context, 128);
case MVT::f16: return Type::getHalfTy(Context);
- case MVT::bf16: return Type::getBFloatTy(Context);
+ case MVT::bf16: return Type::getBFloatTy(Context);
case MVT::f32: return Type::getFloatTy(Context);
case MVT::f64: return Type::getDoubleTy(Context);
case MVT::f80: return Type::getX86_FP80Ty(Context);
@@ -228,6 +232,10 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
return FixedVectorType::get(Type::getInt1Ty(Context), 512);
case MVT::v1024i1:
return FixedVectorType::get(Type::getInt1Ty(Context), 1024);
+ case MVT::v128i2:
+ return FixedVectorType::get(Type::getIntNTy(Context, 2), 128);
+ case MVT::v64i4:
+ return FixedVectorType::get(Type::getIntNTy(Context, 4), 64);
case MVT::v1i8:
return FixedVectorType::get(Type::getInt8Ty(Context), 1);
case MVT::v2i8:
@@ -500,6 +508,10 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
return ScalableVectorType::get(Type::getBFloatTy(Context), 4);
case MVT::nxv8bf16:
return ScalableVectorType::get(Type::getBFloatTy(Context), 8);
+ case MVT::nxv16bf16:
+ return ScalableVectorType::get(Type::getBFloatTy(Context), 16);
+ case MVT::nxv32bf16:
+ return ScalableVectorType::get(Type::getBFloatTy(Context), 32);
case MVT::nxv1f32:
return ScalableVectorType::get(Type::getFloatTy(Context), 1);
case MVT::nxv2f32:
@@ -520,6 +532,7 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
return ScalableVectorType::get(Type::getDoubleTy(Context), 8);
case MVT::Metadata: return Type::getMetadataTy(Context);
}
+ // clang-format on
}
/// Return the value type corresponding to the specified type. This returns all
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp
index c04a7b28eff9..aa6645227edb 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp
@@ -77,8 +77,8 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/TargetLowering.h"
-#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/WasmEHFuncInfo.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicsWebAssembly.h"
@@ -212,9 +212,15 @@ bool WasmEHPrepare::prepareEHPads(Function &F) {
assert(F.hasPersonalityFn() && "Personality function not found");
- // __wasm_lpad_context global variable
+ // __wasm_lpad_context global variable.
+ // This variable should be thread local. If the target does not support TLS,
+ // we depend on CoalesceFeaturesAndStripAtomics to downgrade it to
+ // non-thread-local ones, in which case we don't allow this object to be
+ // linked with other objects using shared memory.
LPadContextGV = cast<GlobalVariable>(
M.getOrInsertGlobal("__wasm_lpad_context", LPadContextTy));
+ LPadContextGV->setThreadLocalMode(GlobalValue::GeneralDynamicTLSModel);
+
LPadIndexField = IRB.CreateConstGEP2_32(LPadContextTy, LPadContextGV, 0, 0,
"lpad_index_gep");
LSDAField =
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp
index d31183e46d65..b835503ee9ed 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp
@@ -19,14 +19,14 @@
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Triple.h"
-#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/WinEHFuncInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/Verifier.h"
#include "llvm/InitializePasses.h"
-#include "llvm/MC/MCSymbol.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -1256,4 +1256,4 @@ void WinEHFuncInfo::addIPToStateRange(const InvokeInst *II,
LabelToStateMap[InvokeBegin] = std::make_pair(InvokeStateMap[II], InvokeEnd);
}
-WinEHFuncInfo::WinEHFuncInfo() {}
+WinEHFuncInfo::WinEHFuncInfo() = default;