aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib')
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/BranchProbabilityInfo.cpp201
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/DivergenceAnalysis.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/IRSimilarityIdentifier.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/IVDescriptors.cpp46
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp184
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/IntervalPartition.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp60
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/MemoryLocation.cpp22
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/PHITransAddr.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/RegionPass.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp363
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/StackSafetyAnalysis.cpp126
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/SyncDependenceAnalysis.cpp59
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/TargetLibraryInfo.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/Analysis/VFABIDemangling.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/AsmParser/LLParser.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Bitcode/Reader/BitcodeReader.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Bitcode/Reader/MetadataLoader.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp28
-rw-r--r--contrib/llvm-project/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp26
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp213
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp134
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h8
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp619
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp36
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp27
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/IndirectBrExpandPass.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LatencyPriorityQueue.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp70
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h63
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp51
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveRangeUtils.h2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRSampleProfile.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp24
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp24
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp119
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp131
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp50
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp109
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp102
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp29
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp35
-rw-r--r--contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp80
-rw-r--r--contrib/llvm-project/llvm/lib/Demangle/DLangDemangle.cpp253
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/ExecutionEngine.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h40
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Core.cpp234
-rw-r--r--contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/IR/AsmWriter.cpp26
-rw-r--r--contrib/llvm-project/llvm/lib/IR/Core.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/IR/DIBuilder.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/IR/Instructions.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/IR/IntrinsicInst.cpp67
-rw-r--r--contrib/llvm-project/llvm/lib/IR/Operator.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/IR/PassTimingInfo.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/IR/SafepointIRVerifier.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/IR/Verifier.cpp31
-rw-r--r--contrib/llvm-project/llvm/lib/InterfaceStub/ELFObjHandler.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCAsmStreamer.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCELFStreamer.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/MC/MCParser/ELFAsmParser.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/MC/WinCOFFObjectWriter.cpp48
-rw-r--r--contrib/llvm-project/llvm/lib/MCA/InstrBuilder.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/MCA/Stages/ExecuteStage.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Object/ELFObjectFile.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/ObjectYAML/COFFEmitter.cpp32
-rw-r--r--contrib/llvm-project/llvm/lib/ObjectYAML/ELFYAML.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/ObjectYAML/MachOEmitter.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/ObjectYAML/MachOYAML.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Option/OptTable.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Passes/PassBuilderPipelines.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Passes/StandardInstrumentations.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/ProfileData/InstrProf.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/ProfileData/InstrProfReader.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/ProfileData/RawMemProfReader.cpp121
-rw-r--r--contrib/llvm-project/llvm/lib/ProfileData/SampleProfReader.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Support/AArch64TargetParser.cpp50
-rw-r--r--contrib/llvm-project/llvm/lib/Support/ARMAttributeParser.cpp26
-rw-r--r--contrib/llvm-project/llvm/lib/Support/ARMBuildAttrs.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Support/CommandLine.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/Support/HTTPClient.cpp97
-rw-r--r--contrib/llvm-project/llvm/lib/Support/KnownBits.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Support/Regex.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Support/StringExtras.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Support/StringRef.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/Support/TargetParser.cpp48
-rw-r--r--contrib/llvm-project/llvm/lib/Support/ThreadPool.cpp29
-rw-r--r--contrib/llvm-project/llvm/lib/TableGen/TGLexer.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CondBrTuning.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp297
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.h6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.td16
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp57
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCombine.td11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp41
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp49
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td29
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp46
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp22
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/CaymanInstructions.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/EvergreenInstructions.td1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp23
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/R600Instructions.td1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/R600Packetizer.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIDefines.h3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp28
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFrameLowering.h3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp80
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.h8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp67
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstructions.td176
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp38
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp155
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.h29
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.td135
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SISchedule.td6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARC/ARCMCInstLower.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARM.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARM.td4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMAsmPrinter.cpp48
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp126
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.h24
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMBranchTargets.cpp135
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp92
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp24
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMFrameLowering.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp206
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrFormats.td29
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrInfo.td8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrMVE.td60
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrThumb.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrThumb2.td138
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp57
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h25
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMPredicates.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMRegisterInfo.td10
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMSubtarget.h3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMSystemRegister.td18
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetMachine.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp33
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp67
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h24
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/BPF/BPFMCInstLower.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/BitTracker.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp18
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp23
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp30
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp31
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonGenMux.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp23
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp22
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonPeephole.cpp18
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/LanaiDelaySlotFiller.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/LanaiFrameLowering.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Lanai/LanaiMCInstLower.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/Mips16FrameLowering.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsAsmPrinter.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp36
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsExpandPseudo.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsFrameLowering.h5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsMCInstLower.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelLowering.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp54
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/P10InstrResources.td5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/P9InstrResources.td6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPC.td18
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp30
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp41
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp150
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp34
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstr64Bit.td12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrAltivec.td39
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrVSX.td5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp73
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMacroFusion.def75
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCSubtarget.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCSubtarget.h8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp91
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp137
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.td1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp28
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.h3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSystemOperands.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/DelaySlotFiller.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/LeonPasses.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/SparcFrameLowering.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Sparc/SparcMCInstLower.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp36
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.h14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZMCInstLower.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.cpp19
-rw-r--r--contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86DomainReassignment.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86DynAllocaExpander.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86ExpandPseudo.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86FixupLEAs.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86FloatingPoint.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.cpp66
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.h19
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp126
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86RegisterBanks.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.cpp251
-rw-r--r--contrib/llvm-project/llvm/lib/Target/XCore/XCoreFrameLowering.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/XCore/XCoreMCInstLower.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalOpt.cpp132
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp37
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/PartialInlining.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfile.cpp41
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp90
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp69
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineInternal.h1
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp23
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp66
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp54
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp127
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LICM.cpp67
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopPassManager.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp189
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/Reassociate.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp71
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/BuildLibCalls.cpp18
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/CloneModule.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/GuardUtils.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp57
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUtils.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/SSAUpdater.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/SampleProfileInference.cpp462
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp68
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp52
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp980
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp612
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.cpp20
-rw-r--r--contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h23
360 files changed, 8762 insertions, 4120 deletions
diff --git a/contrib/llvm-project/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/BranchProbabilityInfo.cpp
index 33fdc8b628c5..856d7e90acb2 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/BranchProbabilityInfo.cpp
@@ -104,12 +104,67 @@ static const uint32_t LBH_NONTAKEN_WEIGHT = 4;
/// All reachable probability will proportionally share the remaining part.
static const BranchProbability UR_TAKEN_PROB = BranchProbability::getRaw(1);
+/// Heuristics and lookup tables for non-loop branches:
+/// Pointer Heuristics (PH)
static const uint32_t PH_TAKEN_WEIGHT = 20;
static const uint32_t PH_NONTAKEN_WEIGHT = 12;
+static const BranchProbability
+ PtrTakenProb(PH_TAKEN_WEIGHT, PH_TAKEN_WEIGHT + PH_NONTAKEN_WEIGHT);
+static const BranchProbability
+ PtrUntakenProb(PH_NONTAKEN_WEIGHT, PH_TAKEN_WEIGHT + PH_NONTAKEN_WEIGHT);
+
+using ProbabilityList = SmallVector<BranchProbability>;
+using ProbabilityTable = std::map<CmpInst::Predicate, ProbabilityList>;
+
+/// Pointer comparisons:
+static const ProbabilityTable PointerTable{
+ {ICmpInst::ICMP_NE, {PtrTakenProb, PtrUntakenProb}}, /// p != q -> Likely
+ {ICmpInst::ICMP_EQ, {PtrUntakenProb, PtrTakenProb}}, /// p == q -> Unlikely
+};
+/// Zero Heuristics (ZH)
static const uint32_t ZH_TAKEN_WEIGHT = 20;
static const uint32_t ZH_NONTAKEN_WEIGHT = 12;
+static const BranchProbability
+ ZeroTakenProb(ZH_TAKEN_WEIGHT, ZH_TAKEN_WEIGHT + ZH_NONTAKEN_WEIGHT);
+static const BranchProbability
+ ZeroUntakenProb(ZH_NONTAKEN_WEIGHT, ZH_TAKEN_WEIGHT + ZH_NONTAKEN_WEIGHT);
+
+/// Integer compares with 0:
+static const ProbabilityTable ICmpWithZeroTable{
+ {CmpInst::ICMP_EQ, {ZeroUntakenProb, ZeroTakenProb}}, /// X == 0 -> Unlikely
+ {CmpInst::ICMP_NE, {ZeroTakenProb, ZeroUntakenProb}}, /// X != 0 -> Likely
+ {CmpInst::ICMP_SLT, {ZeroUntakenProb, ZeroTakenProb}}, /// X < 0 -> Unlikely
+ {CmpInst::ICMP_SGT, {ZeroTakenProb, ZeroUntakenProb}}, /// X > 0 -> Likely
+};
+
+/// Integer compares with -1:
+static const ProbabilityTable ICmpWithMinusOneTable{
+ {CmpInst::ICMP_EQ, {ZeroUntakenProb, ZeroTakenProb}}, /// X == -1 -> Unlikely
+ {CmpInst::ICMP_NE, {ZeroTakenProb, ZeroUntakenProb}}, /// X != -1 -> Likely
+ // InstCombine canonicalizes X >= 0 into X > -1
+ {CmpInst::ICMP_SGT, {ZeroTakenProb, ZeroUntakenProb}}, /// X >= 0 -> Likely
+};
+
+/// Integer compares with 1:
+static const ProbabilityTable ICmpWithOneTable{
+ // InstCombine canonicalizes X <= 0 into X < 1
+ {CmpInst::ICMP_SLT, {ZeroUntakenProb, ZeroTakenProb}}, /// X <= 0 -> Unlikely
+};
+
+/// strcmp and similar functions return zero, negative, or positive, if the
+/// first string is equal, less, or greater than the second. We consider it
+/// likely that the strings are not equal, so a comparison with zero is
+/// probably false, but also a comparison with any other number is also
+/// probably false given that what exactly is returned for nonzero values is
+/// not specified. Any kind of comparison other than equality we know
+/// nothing about.
+static const ProbabilityTable ICmpWithLibCallTable{
+ {CmpInst::ICMP_EQ, {ZeroUntakenProb, ZeroTakenProb}},
+ {CmpInst::ICMP_NE, {ZeroTakenProb, ZeroUntakenProb}},
+};
+// Floating-Point Heuristics (FPH)
static const uint32_t FPH_TAKEN_WEIGHT = 20;
static const uint32_t FPH_NONTAKEN_WEIGHT = 12;
@@ -120,6 +175,21 @@ static const uint32_t FPH_ORD_WEIGHT = 1024 * 1024 - 1;
/// exceptional case, so the result is unlikely.
static const uint32_t FPH_UNO_WEIGHT = 1;
+static const BranchProbability FPOrdTakenProb(FPH_ORD_WEIGHT,
+ FPH_ORD_WEIGHT + FPH_UNO_WEIGHT);
+static const BranchProbability
+ FPOrdUntakenProb(FPH_UNO_WEIGHT, FPH_ORD_WEIGHT + FPH_UNO_WEIGHT);
+static const BranchProbability
+ FPTakenProb(FPH_TAKEN_WEIGHT, FPH_TAKEN_WEIGHT + FPH_NONTAKEN_WEIGHT);
+static const BranchProbability
+ FPUntakenProb(FPH_NONTAKEN_WEIGHT, FPH_TAKEN_WEIGHT + FPH_NONTAKEN_WEIGHT);
+
+/// Floating-Point compares:
+static const ProbabilityTable FCmpTable{
+ {FCmpInst::FCMP_ORD, {FPOrdTakenProb, FPOrdUntakenProb}}, /// !isnan -> Likely
+ {FCmpInst::FCMP_UNO, {FPOrdUntakenProb, FPOrdTakenProb}}, /// isnan -> Unlikely
+};
+
/// Set of dedicated "absolute" execution weights for a block. These weights are
/// meaningful relative to each other and their derivatives only.
enum class BlockExecWeight : std::uint32_t {
@@ -468,21 +538,10 @@ bool BranchProbabilityInfo::calcPointerHeuristics(const BasicBlock *BB) {
assert(CI->getOperand(1)->getType()->isPointerTy());
- BranchProbability TakenProb(PH_TAKEN_WEIGHT,
- PH_TAKEN_WEIGHT + PH_NONTAKEN_WEIGHT);
- BranchProbability UntakenProb(PH_NONTAKEN_WEIGHT,
- PH_TAKEN_WEIGHT + PH_NONTAKEN_WEIGHT);
-
- // p != 0 -> isProb = true
- // p == 0 -> isProb = false
- // p != q -> isProb = true
- // p == q -> isProb = false;
- bool isProb = CI->getPredicate() == ICmpInst::ICMP_NE;
- if (!isProb)
- std::swap(TakenProb, UntakenProb);
-
- setEdgeProbability(
- BB, SmallVector<BranchProbability, 2>({TakenProb, UntakenProb}));
+ auto Search = PointerTable.find(CI->getPredicate());
+ if (Search == PointerTable.end())
+ return false;
+ setEdgeProbability(BB, Search->second);
return true;
}
@@ -949,86 +1008,33 @@ bool BranchProbabilityInfo::calcZeroHeuristics(const BasicBlock *BB,
if (Function *CalledFn = Call->getCalledFunction())
TLI->getLibFunc(*CalledFn, Func);
- bool isProb;
+ ProbabilityTable::const_iterator Search;
if (Func == LibFunc_strcasecmp ||
Func == LibFunc_strcmp ||
Func == LibFunc_strncasecmp ||
Func == LibFunc_strncmp ||
Func == LibFunc_memcmp ||
Func == LibFunc_bcmp) {
- // strcmp and similar functions return zero, negative, or positive, if the
- // first string is equal, less, or greater than the second. We consider it
- // likely that the strings are not equal, so a comparison with zero is
- // probably false, but also a comparison with any other number is also
- // probably false given that what exactly is returned for nonzero values is
- // not specified. Any kind of comparison other than equality we know
- // nothing about.
- switch (CI->getPredicate()) {
- case CmpInst::ICMP_EQ:
- isProb = false;
- break;
- case CmpInst::ICMP_NE:
- isProb = true;
- break;
- default:
+ Search = ICmpWithLibCallTable.find(CI->getPredicate());
+ if (Search == ICmpWithLibCallTable.end())
return false;
- }
} else if (CV->isZero()) {
- switch (CI->getPredicate()) {
- case CmpInst::ICMP_EQ:
- // X == 0 -> Unlikely
- isProb = false;
- break;
- case CmpInst::ICMP_NE:
- // X != 0 -> Likely
- isProb = true;
- break;
- case CmpInst::ICMP_SLT:
- // X < 0 -> Unlikely
- isProb = false;
- break;
- case CmpInst::ICMP_SGT:
- // X > 0 -> Likely
- isProb = true;
- break;
- default:
+ Search = ICmpWithZeroTable.find(CI->getPredicate());
+ if (Search == ICmpWithZeroTable.end())
+ return false;
+ } else if (CV->isOne()) {
+ Search = ICmpWithOneTable.find(CI->getPredicate());
+ if (Search == ICmpWithOneTable.end())
return false;
- }
- } else if (CV->isOne() && CI->getPredicate() == CmpInst::ICMP_SLT) {
- // InstCombine canonicalizes X <= 0 into X < 1.
- // X <= 0 -> Unlikely
- isProb = false;
} else if (CV->isMinusOne()) {
- switch (CI->getPredicate()) {
- case CmpInst::ICMP_EQ:
- // X == -1 -> Unlikely
- isProb = false;
- break;
- case CmpInst::ICMP_NE:
- // X != -1 -> Likely
- isProb = true;
- break;
- case CmpInst::ICMP_SGT:
- // InstCombine canonicalizes X >= 0 into X > -1.
- // X >= 0 -> Likely
- isProb = true;
- break;
- default:
+ Search = ICmpWithMinusOneTable.find(CI->getPredicate());
+ if (Search == ICmpWithMinusOneTable.end())
return false;
- }
} else {
return false;
}
- BranchProbability TakenProb(ZH_TAKEN_WEIGHT,
- ZH_TAKEN_WEIGHT + ZH_NONTAKEN_WEIGHT);
- BranchProbability UntakenProb(ZH_NONTAKEN_WEIGHT,
- ZH_TAKEN_WEIGHT + ZH_NONTAKEN_WEIGHT);
- if (!isProb)
- std::swap(TakenProb, UntakenProb);
-
- setEdgeProbability(
- BB, SmallVector<BranchProbability, 2>({TakenProb, UntakenProb}));
+ setEdgeProbability(BB, Search->second);
return true;
}
@@ -1042,34 +1048,21 @@ bool BranchProbabilityInfo::calcFloatingPointHeuristics(const BasicBlock *BB) {
if (!FCmp)
return false;
- uint32_t TakenWeight = FPH_TAKEN_WEIGHT;
- uint32_t NontakenWeight = FPH_NONTAKEN_WEIGHT;
- bool isProb;
+ ProbabilityList ProbList;
if (FCmp->isEquality()) {
- // f1 == f2 -> Unlikely
- // f1 != f2 -> Likely
- isProb = !FCmp->isTrueWhenEqual();
- } else if (FCmp->getPredicate() == FCmpInst::FCMP_ORD) {
- // !isnan -> Likely
- isProb = true;
- TakenWeight = FPH_ORD_WEIGHT;
- NontakenWeight = FPH_UNO_WEIGHT;
- } else if (FCmp->getPredicate() == FCmpInst::FCMP_UNO) {
- // isnan -> Unlikely
- isProb = false;
- TakenWeight = FPH_ORD_WEIGHT;
- NontakenWeight = FPH_UNO_WEIGHT;
+ ProbList = !FCmp->isTrueWhenEqual() ?
+ // f1 == f2 -> Unlikely
+ ProbabilityList({FPTakenProb, FPUntakenProb}) :
+ // f1 != f2 -> Likely
+ ProbabilityList({FPUntakenProb, FPTakenProb});
} else {
- return false;
+ auto Search = FCmpTable.find(FCmp->getPredicate());
+ if (Search == FCmpTable.end())
+ return false;
+ ProbList = Search->second;
}
- BranchProbability TakenProb(TakenWeight, TakenWeight + NontakenWeight);
- BranchProbability UntakenProb(NontakenWeight, TakenWeight + NontakenWeight);
- if (!isProb)
- std::swap(TakenProb, UntakenProb);
-
- setEdgeProbability(
- BB, SmallVector<BranchProbability, 2>({TakenProb, UntakenProb}));
+ setEdgeProbability(BB, ProbList);
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/DivergenceAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/DivergenceAnalysis.cpp
index 3634526370f5..7426d0c07592 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/DivergenceAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/DivergenceAnalysis.cpp
@@ -24,12 +24,12 @@
// divergent can help the compiler to selectively run these optimizations.
//
// This implementation is derived from the Vectorization Analysis of the
-// Region Vectorizer (RV). That implementation in turn is based on the approach
-// described in
+// Region Vectorizer (RV). The analysis is based on the approach described in
//
-// Improving Performance of OpenCL on CPUs
-// Ralf Karrenberg and Sebastian Hack
-// CC '12
+// An abstract interpretation for SPMD divergence
+// on reducible control flow graphs.
+// Julian Rosemann, Simon Moll and Sebastian Hack
+// POPL '21
//
// This implementation is generic in the sense that it does
// not itself identify original sources of divergence.
diff --git a/contrib/llvm-project/llvm/lib/Analysis/IRSimilarityIdentifier.cpp b/contrib/llvm-project/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
index f22c6aa04f5e..2ec6cbeabda2 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
@@ -820,7 +820,7 @@ void IRSimilarityIdentifier::populateMapper(
/// subsequence from the \p InstrList, and create an IRSimilarityCandidate from
/// the IRInstructionData in subsequence.
///
-/// \param [in] Mapper - The instruction mapper for sanity checks.
+/// \param [in] Mapper - The instruction mapper for basic correctness checks.
/// \param [in] InstrList - The vector that holds the instruction data.
/// \param [in] IntegerMapping - The vector that holds the mapped integers.
/// \param [out] CandsForRepSubstring - The vector to store the generated
diff --git a/contrib/llvm-project/llvm/lib/Analysis/IVDescriptors.cpp b/contrib/llvm-project/llvm/lib/Analysis/IVDescriptors.cpp
index c4b7239b43ab..cfe910df4e91 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/IVDescriptors.cpp
@@ -81,6 +81,7 @@ bool RecurrenceDescriptor::isArithmeticRecurrenceKind(RecurKind Kind) {
case RecurKind::Mul:
case RecurKind::FAdd:
case RecurKind::FMul:
+ case RecurKind::FMulAdd:
return true;
}
return false;
@@ -194,21 +195,28 @@ static void collectCastsToIgnore(Loop *TheLoop, Instruction *Exit,
// vectorizing floating point operations without unsafe math.
static bool checkOrderedReduction(RecurKind Kind, Instruction *ExactFPMathInst,
Instruction *Exit, PHINode *Phi) {
- // Currently only FAdd is supported
- if (Kind != RecurKind::FAdd)
+ // Currently only FAdd and FMulAdd are supported.
+ if (Kind != RecurKind::FAdd && Kind != RecurKind::FMulAdd)
return false;
- // Ensure the exit instruction is an FAdd, and that it only has one user
- // other than the reduction PHI
- if (Exit->getOpcode() != Instruction::FAdd || Exit->hasNUsesOrMore(3) ||
- Exit != ExactFPMathInst)
+ if (Kind == RecurKind::FAdd && Exit->getOpcode() != Instruction::FAdd)
+ return false;
+
+ if (Kind == RecurKind::FMulAdd &&
+ !RecurrenceDescriptor::isFMulAddIntrinsic(Exit))
+ return false;
+
+ // Ensure the exit instruction has only one user other than the reduction PHI
+ if (Exit != ExactFPMathInst || Exit->hasNUsesOrMore(3))
return false;
// The only pattern accepted is the one in which the reduction PHI
// is used as one of the operands of the exit instruction
- auto *LHS = Exit->getOperand(0);
- auto *RHS = Exit->getOperand(1);
- if (LHS != Phi && RHS != Phi)
+ auto *Op0 = Exit->getOperand(0);
+ auto *Op1 = Exit->getOperand(1);
+ if (Kind == RecurKind::FAdd && Op0 != Phi && Op1 != Phi)
+ return false;
+ if (Kind == RecurKind::FMulAdd && Exit->getOperand(2) != Phi)
return false;
LLVM_DEBUG(dbgs() << "LV: Found an ordered reduction: Phi: " << *Phi
@@ -389,6 +397,12 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind,
for (User *U : Cur->users()) {
Instruction *UI = cast<Instruction>(U);
+ // If the user is a call to llvm.fmuladd then the instruction can only be
+ // the final operand.
+ if (isFMulAddIntrinsic(UI))
+ if (Cur == UI->getOperand(0) || Cur == UI->getOperand(1))
+ return false;
+
// Check if we found the exit user.
BasicBlock *Parent = UI->getParent();
if (!TheLoop->contains(Parent)) {
@@ -710,6 +724,9 @@ RecurrenceDescriptor::isRecurrenceInstr(Loop *L, PHINode *OrigPhi,
I->hasNoSignedZeros())) &&
isFPMinMaxRecurrenceKind(Kind)))
return isMinMaxPattern(I, Kind, Prev);
+ else if (isFMulAddIntrinsic(I))
+ return InstDesc(Kind == RecurKind::FMulAdd, I,
+ I->hasAllowReassoc() ? nullptr : I);
return InstDesc(false, I);
}
}
@@ -804,6 +821,11 @@ bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
<< " PHI." << *Phi << "\n");
return true;
}
+ if (AddReductionVar(Phi, RecurKind::FMulAdd, TheLoop, FMF, RedDes, DB, AC,
+ DT)) {
+ LLVM_DEBUG(dbgs() << "Found an FMulAdd reduction PHI." << *Phi << "\n");
+ return true;
+ }
// Not a reduction of known type.
return false;
}
@@ -927,6 +949,7 @@ Value *RecurrenceDescriptor::getRecurrenceIdentity(RecurKind K, Type *Tp,
case RecurKind::FMul:
// Multiplying a number by 1 does not change it.
return ConstantFP::get(Tp, 1.0L);
+ case RecurKind::FMulAdd:
case RecurKind::FAdd:
// Adding zero to a number does not change it.
// FIXME: Ideally we should not need to check FMF for FAdd and should always
@@ -974,6 +997,7 @@ unsigned RecurrenceDescriptor::getOpcode(RecurKind Kind) {
return Instruction::Xor;
case RecurKind::FMul:
return Instruction::FMul;
+ case RecurKind::FMulAdd:
case RecurKind::FAdd:
return Instruction::FAdd;
case RecurKind::SMax:
@@ -1032,6 +1056,10 @@ RecurrenceDescriptor::getReductionOpChain(PHINode *Phi, Loop *L) const {
return SelectPatternResult::isMinOrMax(
matchSelectPattern(Cur, LHS, RHS).Flavor);
}
+ // Recognize a call to the llvm.fmuladd intrinsic.
+ if (isFMulAddIntrinsic(Cur))
+ return true;
+
return Cur->getOpcode() == RedOp;
};
diff --git a/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp
index 864eeea4f8bf..22d2ce11cc90 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -2180,6 +2180,55 @@ Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
return ::SimplifyAndInst(Op0, Op1, Q, RecursionLimit);
}
+static Value *simplifyOrLogic(Value *X, Value *Y) {
+ assert(X->getType() == Y->getType() && "Expected same type for 'or' ops");
+ Type *Ty = X->getType();
+
+ // X | ~X --> -1
+ if (match(Y, m_Not(m_Specific(X))))
+ return ConstantInt::getAllOnesValue(Ty);
+
+ // X | ~(X & ?) = -1
+ if (match(Y, m_Not(m_c_And(m_Specific(X), m_Value()))))
+ return ConstantInt::getAllOnesValue(Ty);
+
+ // X | (X & ?) --> X
+ if (match(Y, m_c_And(m_Specific(X), m_Value())))
+ return X;
+
+ Value *A, *B;
+
+ // (A & ~B) | (A ^ B) --> A ^ B
+ // (~B & A) | (A ^ B) --> A ^ B
+ // (A & ~B) | (B ^ A) --> B ^ A
+ // (~B & A) | (B ^ A) --> B ^ A
+ if (match(X, m_c_And(m_Value(A), m_Not(m_Value(B)))) &&
+ match(Y, m_c_Xor(m_Specific(A), m_Specific(B))))
+ return Y;
+
+ // (~A ^ B) | (A & B) --> ~A ^ B
+ // (B ^ ~A) | (A & B) --> B ^ ~A
+ // (~A ^ B) | (B & A) --> ~A ^ B
+ // (B ^ ~A) | (B & A) --> B ^ ~A
+ if (match(X, m_c_Xor(m_Not(m_Value(A)), m_Value(B))) &&
+ match(Y, m_c_And(m_Specific(A), m_Specific(B))))
+ return X;
+
+ // (A ^ B) | (A | B) --> A | B
+ // (A ^ B) | (B | A) --> B | A
+ if (match(X, m_Xor(m_Value(A), m_Value(B))) &&
+ match(Y, m_c_Or(m_Specific(A), m_Specific(B))))
+ return Y;
+
+ // ~(A ^ B) | (A | B) --> -1
+ // ~(A ^ B) | (B | A) --> -1
+ if (match(X, m_Not(m_Xor(m_Value(A), m_Value(B)))) &&
+ match(Y, m_c_Or(m_Specific(A), m_Specific(B))))
+ return ConstantInt::getAllOnesValue(Ty);
+
+ return nullptr;
+}
+
/// Given operands for an Or, see if we can fold the result.
/// If not, this returns null.
static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
@@ -2202,81 +2251,15 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
if (Op0 == Op1 || match(Op1, m_Zero()))
return Op0;
- // A | ~A = ~A | A = -1
- if (match(Op0, m_Not(m_Specific(Op1))) ||
- match(Op1, m_Not(m_Specific(Op0))))
- return Constant::getAllOnesValue(Op0->getType());
-
- // (A & ?) | A = A
- if (match(Op0, m_c_And(m_Specific(Op1), m_Value())))
- return Op1;
-
- // A | (A & ?) = A
- if (match(Op1, m_c_And(m_Specific(Op0), m_Value())))
- return Op0;
-
- // ~(A & ?) | A = -1
- if (match(Op0, m_Not(m_c_And(m_Specific(Op1), m_Value()))))
- return Constant::getAllOnesValue(Op1->getType());
-
- // A | ~(A & ?) = -1
- if (match(Op1, m_Not(m_c_And(m_Specific(Op0), m_Value()))))
- return Constant::getAllOnesValue(Op0->getType());
+ if (Value *R = simplifyOrLogic(Op0, Op1))
+ return R;
+ if (Value *R = simplifyOrLogic(Op1, Op0))
+ return R;
if (Value *V = simplifyLogicOfAddSub(Op0, Op1, Instruction::Or))
return V;
Value *A, *B, *NotA;
- // (A & ~B) | (A ^ B) -> (A ^ B)
- // (~B & A) | (A ^ B) -> (A ^ B)
- // (A & ~B) | (B ^ A) -> (B ^ A)
- // (~B & A) | (B ^ A) -> (B ^ A)
- if (match(Op1, m_Xor(m_Value(A), m_Value(B))) &&
- (match(Op0, m_c_And(m_Specific(A), m_Not(m_Specific(B)))) ||
- match(Op0, m_c_And(m_Not(m_Specific(A)), m_Specific(B)))))
- return Op1;
-
- // Commute the 'or' operands.
- // (A ^ B) | (A & ~B) -> (A ^ B)
- // (A ^ B) | (~B & A) -> (A ^ B)
- // (B ^ A) | (A & ~B) -> (B ^ A)
- // (B ^ A) | (~B & A) -> (B ^ A)
- if (match(Op0, m_Xor(m_Value(A), m_Value(B))) &&
- (match(Op1, m_c_And(m_Specific(A), m_Not(m_Specific(B)))) ||
- match(Op1, m_c_And(m_Not(m_Specific(A)), m_Specific(B)))))
- return Op0;
-
- // (A & B) | (~A ^ B) -> (~A ^ B)
- // (B & A) | (~A ^ B) -> (~A ^ B)
- // (A & B) | (B ^ ~A) -> (B ^ ~A)
- // (B & A) | (B ^ ~A) -> (B ^ ~A)
- if (match(Op0, m_And(m_Value(A), m_Value(B))) &&
- (match(Op1, m_c_Xor(m_Specific(A), m_Not(m_Specific(B)))) ||
- match(Op1, m_c_Xor(m_Not(m_Specific(A)), m_Specific(B)))))
- return Op1;
-
- // Commute the 'or' operands.
- // (~A ^ B) | (A & B) -> (~A ^ B)
- // (~A ^ B) | (B & A) -> (~A ^ B)
- // (B ^ ~A) | (A & B) -> (B ^ ~A)
- // (B ^ ~A) | (B & A) -> (B ^ ~A)
- if (match(Op1, m_And(m_Value(A), m_Value(B))) &&
- (match(Op0, m_c_Xor(m_Specific(A), m_Not(m_Specific(B)))) ||
- match(Op0, m_c_Xor(m_Not(m_Specific(A)), m_Specific(B)))))
- return Op0;
-
- // (A | B) | (A ^ B) --> A | B
- // (B | A) | (A ^ B) --> B | A
- if (match(Op1, m_Xor(m_Value(A), m_Value(B))) &&
- match(Op0, m_c_Or(m_Specific(A), m_Specific(B))))
- return Op0;
-
- // Commute the outer 'or' operands.
- // (A ^ B) | (A | B) --> A | B
- // (A ^ B) | (B | A) --> B | A
- if (match(Op0, m_Xor(m_Value(A), m_Value(B))) &&
- match(Op1, m_c_Or(m_Specific(A), m_Specific(B))))
- return Op1;
// (~A & B) | ~(A | B) --> ~A
// (~A & B) | ~(B | A) --> ~A
@@ -2414,6 +2397,30 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
match(Op1, m_Not(m_Specific(Op0))))
return Constant::getAllOnesValue(Op0->getType());
+ auto foldAndOrNot = [](Value *X, Value *Y) -> Value * {
+ Value *A, *B;
+ // (~A & B) ^ (A | B) --> A -- There are 8 commuted variants.
+ if (match(X, m_c_And(m_Not(m_Value(A)), m_Value(B))) &&
+ match(Y, m_c_Or(m_Specific(A), m_Specific(B))))
+ return A;
+
+ // (~A | B) ^ (A & B) --> ~A -- There are 8 commuted variants.
+ // The 'not' op must contain a complete -1 operand (no undef elements for
+ // vector) for the transform to be safe.
+ Value *NotA;
+ if (match(X,
+ m_c_Or(m_CombineAnd(m_NotForbidUndef(m_Value(A)), m_Value(NotA)),
+ m_Value(B))) &&
+ match(Y, m_c_And(m_Specific(A), m_Specific(B))))
+ return NotA;
+
+ return nullptr;
+ };
+ if (Value *R = foldAndOrNot(Op0, Op1))
+ return R;
+ if (Value *R = foldAndOrNot(Op1, Op0))
+ return R;
+
if (Value *V = simplifyLogicOfAddSub(Op0, Op1, Instruction::Xor))
return V;
@@ -2935,8 +2942,10 @@ static Value *simplifyICmpWithBinOpOnLHS(
return getFalse(ITy);
}
- // x >> y <=u x
- // x udiv y <=u x.
+ // x >>u y <=u x --> true.
+ // x >>u y >u x --> false.
+ // x udiv y <=u x --> true.
+ // x udiv y >u x --> false.
if (match(LBO, m_LShr(m_Specific(RHS), m_Value())) ||
match(LBO, m_UDiv(m_Specific(RHS), m_Value()))) {
// icmp pred (X op Y), X
@@ -2946,6 +2955,37 @@ static Value *simplifyICmpWithBinOpOnLHS(
return getTrue(ITy);
}
+ // If x is nonzero:
+ // x >>u C <u x --> true for C != 0.
+ // x >>u C != x --> true for C != 0.
+ // x >>u C >=u x --> false for C != 0.
+ // x >>u C == x --> false for C != 0.
+ // x udiv C <u x --> true for C != 1.
+ // x udiv C != x --> true for C != 1.
+ // x udiv C >=u x --> false for C != 1.
+ // x udiv C == x --> false for C != 1.
+ // TODO: allow non-constant shift amount/divisor
+ const APInt *C;
+ if ((match(LBO, m_LShr(m_Specific(RHS), m_APInt(C))) && *C != 0) ||
+ (match(LBO, m_UDiv(m_Specific(RHS), m_APInt(C))) && *C != 1)) {
+ if (isKnownNonZero(RHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) {
+ switch (Pred) {
+ default:
+ break;
+ case ICmpInst::ICMP_EQ:
+ case ICmpInst::ICMP_UGE:
+ return getFalse(ITy);
+ case ICmpInst::ICMP_NE:
+ case ICmpInst::ICMP_ULT:
+ return getTrue(ITy);
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_ULE:
+ // UGT/ULE are handled by the more general case just above
+ llvm_unreachable("Unexpected UGT/ULE, should have been handled");
+ }
+ }
+ }
+
// (x*C1)/C2 <= x for C1 <= C2.
// This holds even if the multiplication overflows: Assume that x != 0 and
// arithmetic is modulo M. For overflow to occur we must have C1 >= M/x and
diff --git a/contrib/llvm-project/llvm/lib/Analysis/IntervalPartition.cpp b/contrib/llvm-project/llvm/lib/Analysis/IntervalPartition.cpp
index 23ff4fd6f85e..d9620fd405bc 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/IntervalPartition.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/IntervalPartition.cpp
@@ -36,16 +36,16 @@ INITIALIZE_PASS(IntervalPartition, "intervals",
// releaseMemory - Reset state back to before function was analyzed
void IntervalPartition::releaseMemory() {
- for (unsigned i = 0, e = Intervals.size(); i != e; ++i)
- delete Intervals[i];
+ for (Interval *I : Intervals)
+ delete I;
IntervalMap.clear();
Intervals.clear();
RootInterval = nullptr;
}
void IntervalPartition::print(raw_ostream &O, const Module*) const {
- for(unsigned i = 0, e = Intervals.size(); i != e; ++i)
- Intervals[i]->print(O);
+ for (const Interval *I : Intervals)
+ I->print(O);
}
// addIntervalToPartition - Add an interval to the internal list of intervals,
@@ -87,8 +87,8 @@ bool IntervalPartition::runOnFunction(Function &F) {
// Now that we know all of the successor information, propagate this to the
// predecessors for each block.
- for (unsigned i = 0, e = Intervals.size(); i != e; ++i)
- updatePredecessors(Intervals[i]);
+ for (Interval *I : Intervals)
+ updatePredecessors(I);
return false;
}
@@ -113,6 +113,6 @@ IntervalPartition::IntervalPartition(IntervalPartition &IP, bool)
// Now that we know all of the successor information, propagate this to the
// predecessors for each block.
- for (unsigned i = 0, e = Intervals.size(); i != e; ++i)
- updatePredecessors(Intervals[i]);
+ for (Interval *I : Intervals)
+ updatePredecessors(I);
}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp
index 50fa169c2081..5b5d48bf6fe5 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/LazyValueInfo.cpp
@@ -1095,7 +1095,8 @@ static ValueLatticeElement getValueFromICmpCondition(Value *Val, ICmpInst *ICI,
if (!Ty->isIntegerTy())
return ValueLatticeElement::getOverdefined();
- APInt Offset(Ty->getScalarSizeInBits(), 0);
+ unsigned BitWidth = Ty->getScalarSizeInBits();
+ APInt Offset(BitWidth, 0);
if (matchICmpOperand(Offset, LHS, Val, EdgePred))
return getValueFromSimpleICmpCondition(EdgePred, RHS, Offset);
@@ -1118,13 +1119,23 @@ static ValueLatticeElement getValueFromICmpCondition(Value *Val, ICmpInst *ICI,
// If (Val & Mask) != 0 then the value must be larger than the lowest set
// bit of Mask.
if (EdgePred == ICmpInst::ICMP_NE && !Mask->isZero() && C->isZero()) {
- unsigned BitWidth = Ty->getIntegerBitWidth();
return ValueLatticeElement::getRange(ConstantRange::getNonEmpty(
APInt::getOneBitSet(BitWidth, Mask->countTrailingZeros()),
APInt::getZero(BitWidth)));
}
}
+ // If (X urem Modulus) >= C, then X >= C.
+ // TODO: An upper bound could be computed as well.
+ if (match(LHS, m_URem(m_Specific(Val), m_Value())) &&
+ match(RHS, m_APInt(C))) {
+ // Use the icmp region so we don't have to deal with different predicates.
+ ConstantRange CR = ConstantRange::makeExactICmpRegion(EdgePred, *C);
+ if (!CR.isEmptySet())
+ return ValueLatticeElement::getRange(ConstantRange::getNonEmpty(
+ CR.getUnsignedMin(), APInt(BitWidth, 0)));
+ }
+
return ValueLatticeElement::getOverdefined();
}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index f9bd7167317f..19a24ac6a484 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -666,6 +666,29 @@ static bool isNoWrap(PredicatedScalarEvolution &PSE,
return false;
}
+static void visitPointers(Value *StartPtr, const Loop &InnermostLoop,
+ function_ref<void(Value *)> AddPointer) {
+ SmallPtrSet<Value *, 8> Visited;
+ SmallVector<Value *> WorkList;
+ WorkList.push_back(StartPtr);
+
+ while (!WorkList.empty()) {
+ Value *Ptr = WorkList.pop_back_val();
+ if (!Visited.insert(Ptr).second)
+ continue;
+ auto *PN = dyn_cast<PHINode>(Ptr);
+ // SCEV does not look through non-header PHIs inside the loop. Such phis
+ // can be analyzed by adding separate accesses for each incoming pointer
+ // value.
+ if (PN && InnermostLoop.contains(PN->getParent()) &&
+ PN->getParent() != InnermostLoop.getHeader()) {
+ for (const Use &Inc : PN->incoming_values())
+ WorkList.push_back(Inc);
+ } else
+ AddPointer(Ptr);
+ }
+}
+
bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
MemAccessInfo Access,
const ValueToValueMap &StridesMap,
@@ -1032,13 +1055,11 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy,
bool ShouldCheckWrap) {
Type *Ty = Ptr->getType();
assert(Ty->isPointerTy() && "Unexpected non-ptr");
- unsigned AddrSpace = Ty->getPointerAddressSpace();
+ assert(!AccessTy->isAggregateType() && "Bad stride - Not a pointer to a scalar type");
- // Make sure we're not accessing an aggregate type.
- // TODO: Why? This doesn't make any sense.
- if (AccessTy->isAggregateType()) {
- LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not a pointer to a scalar type"
- << *Ptr << "\n");
+ if (isa<ScalableVectorType>(AccessTy)) {
+ LLVM_DEBUG(dbgs() << "LAA: Bad stride - Scalable object: " << *AccessTy
+ << "\n");
return 0;
}
@@ -1068,6 +1089,7 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy,
// An getelementptr without an inbounds attribute and unit stride would have
// to access the pointer value "0" which is undefined behavior in address
// space 0, therefore we can also vectorize this case.
+ unsigned AddrSpace = Ty->getPointerAddressSpace();
bool IsInBoundsGEP = isInBoundsGep(Ptr);
bool IsNoWrapAddRec = !ShouldCheckWrap ||
PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW) ||
@@ -1101,7 +1123,8 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy,
}
auto &DL = Lp->getHeader()->getModule()->getDataLayout();
- int64_t Size = DL.getTypeAllocSize(AccessTy);
+ TypeSize AllocSize = DL.getTypeAllocSize(AccessTy);
+ int64_t Size = AllocSize.getFixedSize();
const APInt &APStepVal = C->getAPInt();
// Huge step value - give up.
@@ -1263,29 +1286,6 @@ bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
return Diff && *Diff == 1;
}
-static void visitPointers(Value *StartPtr, const Loop &InnermostLoop,
- function_ref<void(Value *)> AddPointer) {
- SmallPtrSet<Value *, 8> Visited;
- SmallVector<Value *> WorkList;
- WorkList.push_back(StartPtr);
-
- while (!WorkList.empty()) {
- Value *Ptr = WorkList.pop_back_val();
- if (!Visited.insert(Ptr).second)
- continue;
- auto *PN = dyn_cast<PHINode>(Ptr);
- // SCEV does not look through non-header PHIs inside the loop. Such phis
- // can be analyzed by adding separate accesses for each incoming pointer
- // value.
- if (PN && InnermostLoop.contains(PN->getParent()) &&
- PN->getParent() != InnermostLoop.getHeader()) {
- for (const Use &Inc : PN->incoming_values())
- WorkList.push_back(Inc);
- } else
- AddPointer(Ptr);
- }
-}
-
void MemoryDepChecker::addAccess(StoreInst *SI) {
visitPointers(SI->getPointerOperand(), *InnermostLoop,
[this, SI](Value *Ptr) {
diff --git a/contrib/llvm-project/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
index b44d15e71556..da6bb4c49cba 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -1481,11 +1481,11 @@ void MemoryDependenceResults::removeCachedNonLocalPointerDependencies(
// instructions from the reverse map.
NonLocalDepInfo &PInfo = It->second.NonLocalDeps;
- for (unsigned i = 0, e = PInfo.size(); i != e; ++i) {
- Instruction *Target = PInfo[i].getResult().getInst();
+ for (const NonLocalDepEntry &DE : PInfo) {
+ Instruction *Target = DE.getResult().getInst();
if (!Target)
continue; // Ignore non-local dep results.
- assert(Target->getParent() == PInfo[i].getBB());
+ assert(Target->getParent() == DE.getBB());
// Eliminating the dirty entry from 'Cache', so update the reverse info.
RemoveFromReverseMap(ReverseNonLocalPtrDeps, Target, P);
diff --git a/contrib/llvm-project/llvm/lib/Analysis/MemoryLocation.cpp b/contrib/llvm-project/llvm/lib/Analysis/MemoryLocation.cpp
index 7f2d04c49565..854ba83bd34a 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/MemoryLocation.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/MemoryLocation.cpp
@@ -213,6 +213,28 @@ MemoryLocation MemoryLocation::getForArgument(const CallBase *Call,
LibFunc F;
if (TLI && TLI->getLibFunc(*Call, F) && TLI->has(F)) {
switch (F) {
+ case LibFunc_memset_chk: {
+ assert(ArgIdx == 0 && "Invalid argument index for memset_chk");
+ LocationSize Size = LocationSize::afterPointer();
+ if (const auto *Len = dyn_cast<ConstantInt>(Call->getArgOperand(2))) {
+ // memset_chk writes at most Len bytes. It may write less, if Len
+ // exceeds the specified max size and aborts.
+ Size = LocationSize::upperBound(Len->getZExtValue());
+ }
+ return MemoryLocation(Arg, Size, AATags);
+ }
+ case LibFunc_strncpy: {
+ assert((ArgIdx == 0 || ArgIdx == 1) &&
+ "Invalid argument index for strncpy");
+ LocationSize Size = LocationSize::afterPointer();
+ if (const auto *Len = dyn_cast<ConstantInt>(Call->getArgOperand(2))) {
+ // strncpy is guaranteed to write Len bytes, but only reads up to Len
+ // bytes.
+ Size = ArgIdx == 0 ? LocationSize::precise(Len->getZExtValue())
+ : LocationSize::upperBound(Len->getZExtValue());
+ }
+ return MemoryLocation(Arg, Size, AATags);
+ }
case LibFunc_memset_pattern16:
assert((ArgIdx == 0 || ArgIdx == 1) &&
"Invalid argument index for memset_pattern16");
diff --git a/contrib/llvm-project/llvm/lib/Analysis/PHITransAddr.cpp b/contrib/llvm-project/llvm/lib/Analysis/PHITransAddr.cpp
index c73e1fd82915..4c80f6743411 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/PHITransAddr.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/PHITransAddr.cpp
@@ -69,7 +69,7 @@ static bool VerifySubExpr(Value *Expr,
}
// If it isn't in the InstInputs list it is a subexpr incorporated into the
- // address. Sanity check that it is phi translatable.
+ // address. Validate that it is phi translatable.
if (!CanPHITrans(I)) {
errs() << "Instruction in PHITransAddr is not phi-translatable:\n";
errs() << *I << '\n';
diff --git a/contrib/llvm-project/llvm/lib/Analysis/RegionPass.cpp b/contrib/llvm-project/llvm/lib/Analysis/RegionPass.cpp
index a73607dbef61..c20ecff5f912 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/RegionPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/RegionPass.cpp
@@ -15,6 +15,7 @@
#include "llvm/Analysis/RegionPass.h"
#include "llvm/IR/OptBisect.h"
#include "llvm/IR/PassTimingInfo.h"
+#include "llvm/IR/PrintPasses.h"
#include "llvm/IR/StructuralHash.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Timer.h"
@@ -187,6 +188,8 @@ public:
}
bool runOnRegion(Region *R, RGPassManager &RGM) override {
+ if (!isFunctionInPrintList(R->getEntry()->getParent()->getName()))
+ return false;
Out << Banner;
for (const auto *BB : R->blocks()) {
if (BB)
diff --git a/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp b/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp
index f7c22cfb0310..7dc7f9904c70 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -2915,8 +2915,8 @@ ScalarEvolution::getOrCreateAddRecExpr(ArrayRef<const SCEV *> Ops,
const Loop *L, SCEV::NoWrapFlags Flags) {
FoldingSetNodeID ID;
ID.AddInteger(scAddRecExpr);
- for (unsigned i = 0, e = Ops.size(); i != e; ++i)
- ID.AddPointer(Ops[i]);
+ for (const SCEV *Op : Ops)
+ ID.AddPointer(Op);
ID.AddPointer(L);
void *IP = nullptr;
SCEVAddRecExpr *S =
@@ -2939,8 +2939,8 @@ ScalarEvolution::getOrCreateMulExpr(ArrayRef<const SCEV *> Ops,
SCEV::NoWrapFlags Flags) {
FoldingSetNodeID ID;
ID.AddInteger(scMulExpr);
- for (unsigned i = 0, e = Ops.size(); i != e; ++i)
- ID.AddPointer(Ops[i]);
+ for (const SCEV *Op : Ops)
+ ID.AddPointer(Op);
void *IP = nullptr;
SCEVMulExpr *S =
static_cast<SCEVMulExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
@@ -3708,8 +3708,8 @@ SCEV *ScalarEvolution::findExistingSCEVInCache(SCEVTypes SCEVType,
ArrayRef<const SCEV *> Ops) {
FoldingSetNodeID ID;
ID.AddInteger(SCEVType);
- for (unsigned i = 0, e = Ops.size(); i != e; ++i)
- ID.AddPointer(Ops[i]);
+ for (const SCEV *Op : Ops)
+ ID.AddPointer(Op);
void *IP = nullptr;
return UniqueSCEVs.FindNodeOrInsertPos(ID, IP);
}
@@ -4094,6 +4094,17 @@ void ScalarEvolution::eraseValueFromMap(Value *V) {
}
}
+void ScalarEvolution::insertValueToMap(Value *V, const SCEV *S) {
+ // A recursive query may have already computed the SCEV. It should be
+ // equivalent, but may not necessarily be exactly the same, e.g. due to lazily
+ // inferred nowrap flags.
+ auto It = ValueExprMap.find_as(V);
+ if (It == ValueExprMap.end()) {
+ ValueExprMap.insert({SCEVCallbackVH(V, this), S});
+ ExprValueMap[S].insert({V, nullptr});
+ }
+}
+
/// Return an existing SCEV if it exists, otherwise analyze the expression and
/// create a new one.
const SCEV *ScalarEvolution::getSCEV(Value *V) {
@@ -4134,10 +4145,9 @@ const SCEV *ScalarEvolution::getExistingSCEV(Value *V) {
ValueExprMapType::iterator I = ValueExprMap.find_as(V);
if (I != ValueExprMap.end()) {
const SCEV *S = I->second;
- if (checkValidity(S))
- return S;
- eraseValueFromMap(V);
- forgetMemoizedResults(S);
+ assert(checkValidity(S) &&
+ "existing SCEV has not been properly invalidated");
+ return S;
}
return nullptr;
}
@@ -4430,44 +4440,6 @@ static void PushDefUseChildren(Instruction *I,
}
}
-void ScalarEvolution::forgetSymbolicName(Instruction *PN, const SCEV *SymName) {
- SmallVector<Instruction *, 16> Worklist;
- SmallPtrSet<Instruction *, 8> Visited;
- SmallVector<const SCEV *, 8> ToForget;
- Visited.insert(PN);
- Worklist.push_back(PN);
- while (!Worklist.empty()) {
- Instruction *I = Worklist.pop_back_val();
-
- auto It = ValueExprMap.find_as(static_cast<Value *>(I));
- if (It != ValueExprMap.end()) {
- const SCEV *Old = It->second;
-
- // Short-circuit the def-use traversal if the symbolic name
- // ceases to appear in expressions.
- if (Old != SymName && !hasOperand(Old, SymName))
- continue;
-
- // SCEVUnknown for a PHI either means that it has an unrecognized
- // structure, it's a PHI that's in the progress of being computed
- // by createNodeForPHI, or it's a single-value PHI. In the first case,
- // additional loop trip count information isn't going to change anything.
- // In the second case, createNodeForPHI will perform the necessary
- // updates on its own when it gets to that point. In the third, we do
- // want to forget the SCEVUnknown.
- if (!isa<PHINode>(I) ||
- !isa<SCEVUnknown>(Old) ||
- (I != PN && Old == SymName)) {
- eraseValueFromMap(It->first);
- ToForget.push_back(Old);
- }
- }
-
- PushDefUseChildren(I, Worklist, Visited);
- }
- forgetMemoizedResults(ToForget);
-}
-
namespace {
/// Takes SCEV S and Loop L. For each AddRec sub-expression, use its start
@@ -5335,15 +5307,17 @@ const SCEV *ScalarEvolution::createSimpleAffineAddRec(PHINode *PN,
const SCEV *StartVal = getSCEV(StartValueV);
const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags);
-
- ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV;
+ insertValueToMap(PN, PHISCEV);
// We can add Flags to the post-inc expression only if we
// know that it is *undefined behavior* for BEValueV to
// overflow.
- if (auto *BEInst = dyn_cast<Instruction>(BEValueV))
- if (isLoopInvariant(Accum, L) && isAddRecNeverPoison(BEInst, L))
+ if (auto *BEInst = dyn_cast<Instruction>(BEValueV)) {
+ assert(isLoopInvariant(Accum, L) &&
+ "Accum is defined outside L, but is not invariant?");
+ if (isAddRecNeverPoison(BEInst, L))
(void)getAddRecExpr(getAddExpr(StartVal, Accum), Accum, L, Flags);
+ }
return PHISCEV;
}
@@ -5386,7 +5360,7 @@ const SCEV *ScalarEvolution::createAddRecFromPHI(PHINode *PN) {
// Handle PHI node value symbolically.
const SCEV *SymbolicName = getUnknown(PN);
- ValueExprMap.insert({SCEVCallbackVH(PN, this), SymbolicName});
+ insertValueToMap(PN, SymbolicName);
// Using this symbolic name for the PHI, analyze the value coming around
// the back-edge.
@@ -5457,8 +5431,8 @@ const SCEV *ScalarEvolution::createAddRecFromPHI(PHINode *PN) {
// Okay, for the entire analysis of this edge we assumed the PHI
// to be symbolic. We now need to go back and purge all of the
// entries for the scalars that use the symbolic expression.
- forgetSymbolicName(PN, SymbolicName);
- ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV;
+ forgetMemoizedResults(SymbolicName);
+ insertValueToMap(PN, PHISCEV);
// We can add Flags to the post-inc expression only if we
// know that it is *undefined behavior* for BEValueV to
@@ -5489,8 +5463,8 @@ const SCEV *ScalarEvolution::createAddRecFromPHI(PHINode *PN) {
// Okay, for the entire analysis of this edge we assumed the PHI
// to be symbolic. We now need to go back and purge all of the
// entries for the scalars that use the symbolic expression.
- forgetSymbolicName(PN, SymbolicName);
- ValueExprMap[SCEVCallbackVH(PN, this)] = Shifted;
+ forgetMemoizedResults(SymbolicName);
+ insertValueToMap(PN, Shifted);
return Shifted;
}
}
@@ -7598,62 +7572,19 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
// Now that we know more about the trip count for this loop, forget any
// existing SCEV values for PHI nodes in this loop since they are only
// conservative estimates made without the benefit of trip count
- // information. This is similar to the code in forgetLoop, except that
- // it handles SCEVUnknown PHI nodes specially.
+ // information. This invalidation is not necessary for correctness, and is
+ // only done to produce more precise results.
if (Result.hasAnyInfo()) {
- SmallVector<Instruction *, 16> Worklist;
- SmallPtrSet<Instruction *, 8> Discovered;
+ // Invalidate any expression using an addrec in this loop.
SmallVector<const SCEV *, 8> ToForget;
- PushLoopPHIs(L, Worklist, Discovered);
- while (!Worklist.empty()) {
- Instruction *I = Worklist.pop_back_val();
-
- ValueExprMapType::iterator It =
- ValueExprMap.find_as(static_cast<Value *>(I));
- if (It != ValueExprMap.end()) {
- const SCEV *Old = It->second;
-
- // SCEVUnknown for a PHI either means that it has an unrecognized
- // structure, or it's a PHI that's in the progress of being computed
- // by createNodeForPHI. In the former case, additional loop trip
- // count information isn't going to change anything. In the later
- // case, createNodeForPHI will perform the necessary updates on its
- // own when it gets to that point.
- if (!isa<PHINode>(I) || !isa<SCEVUnknown>(Old)) {
- eraseValueFromMap(It->first);
- ToForget.push_back(Old);
- }
- if (PHINode *PN = dyn_cast<PHINode>(I))
- ConstantEvolutionLoopExitValue.erase(PN);
- }
-
- // Since we don't need to invalidate anything for correctness and we're
- // only invalidating to make SCEV's results more precise, we get to stop
- // early to avoid invalidating too much. This is especially important in
- // cases like:
- //
- // %v = f(pn0, pn1) // pn0 and pn1 used through some other phi node
- // loop0:
- // %pn0 = phi
- // ...
- // loop1:
- // %pn1 = phi
- // ...
- //
- // where both loop0 and loop1's backedge taken count uses the SCEV
- // expression for %v. If we don't have the early stop below then in cases
- // like the above, getBackedgeTakenInfo(loop1) will clear out the trip
- // count for loop0 and getBackedgeTakenInfo(loop0) will clear out the trip
- // count for loop1, effectively nullifying SCEV's trip count cache.
- for (auto *U : I->users())
- if (auto *I = dyn_cast<Instruction>(U)) {
- auto *LoopForUser = LI.getLoopFor(I->getParent());
- if (LoopForUser && L->contains(LoopForUser) &&
- Discovered.insert(I).second)
- Worklist.push_back(I);
- }
- }
+ auto LoopUsersIt = LoopUsers.find(L);
+ if (LoopUsersIt != LoopUsers.end())
+ append_range(ToForget, LoopUsersIt->second);
forgetMemoizedResults(ToForget);
+
+ // Invalidate constant-evolved loop header phis.
+ for (PHINode &PN : L->getHeader()->phis())
+ ConstantEvolutionLoopExitValue.erase(&PN);
}
// Re-lookup the insert position, since the call to
@@ -7672,10 +7603,12 @@ void ScalarEvolution::forgetAllLoops() {
// result.
BackedgeTakenCounts.clear();
PredicatedBackedgeTakenCounts.clear();
+ BECountUsers.clear();
LoopPropertiesCache.clear();
ConstantEvolutionLoopExitValue.clear();
ValueExprMap.clear();
ValuesAtScopes.clear();
+ ValuesAtScopesUsers.clear();
LoopDispositions.clear();
BlockDispositions.clear();
UnsignedRanges.clear();
@@ -7697,8 +7630,8 @@ void ScalarEvolution::forgetLoop(const Loop *L) {
auto *CurrL = LoopWorklist.pop_back_val();
// Drop any stored trip count value.
- BackedgeTakenCounts.erase(CurrL);
- PredicatedBackedgeTakenCounts.erase(CurrL);
+ forgetBackedgeTakenCounts(CurrL, /* Predicated */ false);
+ forgetBackedgeTakenCounts(CurrL, /* Predicated */ true);
// Drop information about predicated SCEV rewrites for this loop.
for (auto I = PredicatedSCEVRewrites.begin();
@@ -7872,10 +7805,6 @@ bool ScalarEvolution::BackedgeTakenInfo::isConstantMaxOrZero(
return MaxOrZero && !any_of(ExitNotTaken, PredicateNotAlwaysTrue);
}
-bool ScalarEvolution::BackedgeTakenInfo::hasOperand(const SCEV *S) const {
- return Operands.contains(S);
-}
-
ScalarEvolution::ExitLimit::ExitLimit(const SCEV *E)
: ExitLimit(E, E, false, None) {
}
@@ -7916,19 +7845,6 @@ ScalarEvolution::ExitLimit::ExitLimit(const SCEV *E, const SCEV *M,
: ExitLimit(E, M, MaxOrZero, None) {
}
-class SCEVRecordOperands {
- SmallPtrSetImpl<const SCEV *> &Operands;
-
-public:
- SCEVRecordOperands(SmallPtrSetImpl<const SCEV *> &Operands)
- : Operands(Operands) {}
- bool follow(const SCEV *S) {
- Operands.insert(S);
- return true;
- }
- bool isDone() { return false; }
-};
-
/// Allocate memory for BackedgeTakenInfo and copy the not-taken count of each
/// computable exit into a persistent ExitNotTakenInfo array.
ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo(
@@ -7957,14 +7873,6 @@ ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo(
assert((isa<SCEVCouldNotCompute>(ConstantMax) ||
isa<SCEVConstant>(ConstantMax)) &&
"No point in having a non-constant max backedge taken count!");
-
- SCEVRecordOperands RecordOperands(Operands);
- SCEVTraversal<SCEVRecordOperands> ST(RecordOperands);
- if (!isa<SCEVCouldNotCompute>(ConstantMax))
- ST.visitAll(ConstantMax);
- for (auto &ENT : ExitNotTaken)
- if (!isa<SCEVCouldNotCompute>(ENT.ExactNotTaken))
- ST.visitAll(ENT.ExactNotTaken);
}
/// Compute the number of times the backedge of the specified loop will execute.
@@ -8046,6 +7954,13 @@ ScalarEvolution::computeBackedgeTakenCount(const Loop *L,
// The loop backedge will be taken the maximum or zero times if there's
// a single exit that must be taken the maximum or zero times.
bool MaxOrZero = (MustExitMaxOrZero && ExitingBlocks.size() == 1);
+
+ // Remember which SCEVs are used in exit limits for invalidation purposes.
+ // We only care about non-constant SCEVs here, so we can ignore EL.MaxNotTaken
+ // and MaxBECount, which must be SCEVConstant.
+ for (const auto &Pair : ExitCounts)
+ if (!isa<SCEVConstant>(Pair.second.ExactNotTaken))
+ BECountUsers[Pair.second.ExactNotTaken].insert({L, AllowPredicates});
return BackedgeTakenInfo(std::move(ExitCounts), CouldComputeBECount,
MaxBECount, MaxOrZero);
}
@@ -8916,6 +8831,9 @@ const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
LS.second = C;
break;
}
+
+ if (!isa<SCEVConstant>(C))
+ ValuesAtScopesUsers[C].push_back({L, V});
return C;
}
@@ -12387,7 +12305,7 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(const ConstantRange &Range,
if (Range.contains(Val->getValue()))
return SE.getCouldNotCompute(); // Something strange happened
- // Ensure that the previous value is in the range. This is a sanity check.
+ // Ensure that the previous value is in the range.
assert(Range.contains(
EvaluateConstantChrecAtConstant(this,
ConstantInt::get(SE.getContext(), ExitVal - 1), SE)->getValue()) &&
@@ -12531,9 +12449,11 @@ ScalarEvolution::ScalarEvolution(ScalarEvolution &&Arg)
BackedgeTakenCounts(std::move(Arg.BackedgeTakenCounts)),
PredicatedBackedgeTakenCounts(
std::move(Arg.PredicatedBackedgeTakenCounts)),
+ BECountUsers(std::move(Arg.BECountUsers)),
ConstantEvolutionLoopExitValue(
std::move(Arg.ConstantEvolutionLoopExitValue)),
ValuesAtScopes(std::move(Arg.ValuesAtScopes)),
+ ValuesAtScopesUsers(std::move(Arg.ValuesAtScopesUsers)),
LoopDispositions(std::move(Arg.LoopDispositions)),
LoopPropertiesCache(std::move(Arg.LoopPropertiesCache)),
BlockDispositions(std::move(Arg.BlockDispositions)),
@@ -12946,6 +12866,23 @@ bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const {
return SCEVExprContains(S, [&](const SCEV *Expr) { return Expr == Op; });
}
+void ScalarEvolution::forgetBackedgeTakenCounts(const Loop *L,
+ bool Predicated) {
+ auto &BECounts =
+ Predicated ? PredicatedBackedgeTakenCounts : BackedgeTakenCounts;
+ auto It = BECounts.find(L);
+ if (It != BECounts.end()) {
+ for (const ExitNotTakenInfo &ENT : It->second.ExitNotTaken) {
+ if (!isa<SCEVConstant>(ENT.ExactNotTaken)) {
+ auto UserIt = BECountUsers.find(ENT.ExactNotTaken);
+ assert(UserIt != BECountUsers.end());
+ UserIt->second.erase({L, Predicated});
+ }
+ }
+ BECounts.erase(It);
+ }
+}
+
void ScalarEvolution::forgetMemoizedResults(ArrayRef<const SCEV *> SCEVs) {
SmallPtrSet<const SCEV *, 8> ToForget(SCEVs.begin(), SCEVs.end());
SmallVector<const SCEV *, 8> Worklist(ToForget.begin(), ToForget.end());
@@ -12970,32 +12907,52 @@ void ScalarEvolution::forgetMemoizedResults(ArrayRef<const SCEV *> SCEVs) {
else
++I;
}
-
- auto RemoveSCEVFromBackedgeMap = [&ToForget](
- DenseMap<const Loop *, BackedgeTakenInfo> &Map) {
- for (auto I = Map.begin(), E = Map.end(); I != E;) {
- BackedgeTakenInfo &BEInfo = I->second;
- if (any_of(ToForget,
- [&BEInfo](const SCEV *S) { return BEInfo.hasOperand(S); }))
- Map.erase(I++);
- else
- ++I;
- }
- };
-
- RemoveSCEVFromBackedgeMap(BackedgeTakenCounts);
- RemoveSCEVFromBackedgeMap(PredicatedBackedgeTakenCounts);
}
void ScalarEvolution::forgetMemoizedResultsImpl(const SCEV *S) {
- ValuesAtScopes.erase(S);
LoopDispositions.erase(S);
BlockDispositions.erase(S);
UnsignedRanges.erase(S);
SignedRanges.erase(S);
- ExprValueMap.erase(S);
HasRecMap.erase(S);
MinTrailingZerosCache.erase(S);
+
+ auto ExprIt = ExprValueMap.find(S);
+ if (ExprIt != ExprValueMap.end()) {
+ for (auto &ValueAndOffset : ExprIt->second) {
+ if (ValueAndOffset.second == nullptr) {
+ auto ValueIt = ValueExprMap.find_as(ValueAndOffset.first);
+ if (ValueIt != ValueExprMap.end())
+ ValueExprMap.erase(ValueIt);
+ }
+ }
+ ExprValueMap.erase(ExprIt);
+ }
+
+ auto ScopeIt = ValuesAtScopes.find(S);
+ if (ScopeIt != ValuesAtScopes.end()) {
+ for (const auto &Pair : ScopeIt->second)
+ if (!isa_and_nonnull<SCEVConstant>(Pair.second))
+ erase_value(ValuesAtScopesUsers[Pair.second],
+ std::make_pair(Pair.first, S));
+ ValuesAtScopes.erase(ScopeIt);
+ }
+
+ auto ScopeUserIt = ValuesAtScopesUsers.find(S);
+ if (ScopeUserIt != ValuesAtScopesUsers.end()) {
+ for (const auto &Pair : ScopeUserIt->second)
+ erase_value(ValuesAtScopes[Pair.second], std::make_pair(Pair.first, S));
+ ValuesAtScopesUsers.erase(ScopeUserIt);
+ }
+
+ auto BEUsersIt = BECountUsers.find(S);
+ if (BEUsersIt != BECountUsers.end()) {
+ // Work on a copy, as forgetBackedgeTakenCounts() will modify the original.
+ auto Copy = BEUsersIt->second;
+ for (const auto &Pair : Copy)
+ forgetBackedgeTakenCounts(Pair.getPointer(), Pair.getInt());
+ BECountUsers.erase(BEUsersIt);
+ }
}
void
@@ -13100,16 +13057,43 @@ void ScalarEvolution::verify() const {
ValidLoops.insert(L);
Worklist.append(L->begin(), L->end());
}
- // Check for SCEV expressions referencing invalid/deleted loops.
for (auto &KV : ValueExprMap) {
- auto *AR = dyn_cast<SCEVAddRecExpr>(KV.second);
- if (!AR)
- continue;
- assert(ValidLoops.contains(AR->getLoop()) &&
- "AddRec references invalid loop");
+ // Check for SCEV expressions referencing invalid/deleted loops.
+ if (auto *AR = dyn_cast<SCEVAddRecExpr>(KV.second)) {
+ assert(ValidLoops.contains(AR->getLoop()) &&
+ "AddRec references invalid loop");
+ }
+
+ // Check that the value is also part of the reverse map.
+ auto It = ExprValueMap.find(KV.second);
+ if (It == ExprValueMap.end() || !It->second.contains({KV.first, nullptr})) {
+ dbgs() << "Value " << *KV.first
+ << " is in ValueExprMap but not in ExprValueMap\n";
+ std::abort();
+ }
+ }
+
+ for (const auto &KV : ExprValueMap) {
+ for (const auto &ValueAndOffset : KV.second) {
+ if (ValueAndOffset.second != nullptr)
+ continue;
+
+ auto It = ValueExprMap.find_as(ValueAndOffset.first);
+ if (It == ValueExprMap.end()) {
+ dbgs() << "Value " << *ValueAndOffset.first
+ << " is in ExprValueMap but not in ValueExprMap\n";
+ std::abort();
+ }
+ if (It->second != KV.first) {
+ dbgs() << "Value " << *ValueAndOffset.first
+ << " mapped to " << *It->second
+ << " rather than " << *KV.first << "\n";
+ std::abort();
+ }
+ }
}
- // Verify intergity of SCEV users.
+ // Verify integrity of SCEV users.
for (const auto &S : UniqueSCEVs) {
SmallVector<const SCEV *, 4> Ops;
collectUniqueOps(&S, Ops);
@@ -13125,6 +13109,61 @@ void ScalarEvolution::verify() const {
std::abort();
}
}
+
+ // Verify integrity of ValuesAtScopes users.
+ for (const auto &ValueAndVec : ValuesAtScopes) {
+ const SCEV *Value = ValueAndVec.first;
+ for (const auto &LoopAndValueAtScope : ValueAndVec.second) {
+ const Loop *L = LoopAndValueAtScope.first;
+ const SCEV *ValueAtScope = LoopAndValueAtScope.second;
+ if (!isa<SCEVConstant>(ValueAtScope)) {
+ auto It = ValuesAtScopesUsers.find(ValueAtScope);
+ if (It != ValuesAtScopesUsers.end() &&
+ is_contained(It->second, std::make_pair(L, Value)))
+ continue;
+ dbgs() << "Value: " << *Value << ", Loop: " << *L << ", ValueAtScope: "
+ << ValueAtScope << " missing in ValuesAtScopesUsers\n";
+ std::abort();
+ }
+ }
+ }
+
+ for (const auto &ValueAtScopeAndVec : ValuesAtScopesUsers) {
+ const SCEV *ValueAtScope = ValueAtScopeAndVec.first;
+ for (const auto &LoopAndValue : ValueAtScopeAndVec.second) {
+ const Loop *L = LoopAndValue.first;
+ const SCEV *Value = LoopAndValue.second;
+ assert(!isa<SCEVConstant>(Value));
+ auto It = ValuesAtScopes.find(Value);
+ if (It != ValuesAtScopes.end() &&
+ is_contained(It->second, std::make_pair(L, ValueAtScope)))
+ continue;
+ dbgs() << "Value: " << *Value << ", Loop: " << *L << ", ValueAtScope: "
+ << ValueAtScope << " missing in ValuesAtScopes\n";
+ std::abort();
+ }
+ }
+
+ // Verify integrity of BECountUsers.
+ auto VerifyBECountUsers = [&](bool Predicated) {
+ auto &BECounts =
+ Predicated ? PredicatedBackedgeTakenCounts : BackedgeTakenCounts;
+ for (const auto &LoopAndBEInfo : BECounts) {
+ for (const ExitNotTakenInfo &ENT : LoopAndBEInfo.second.ExitNotTaken) {
+ if (!isa<SCEVConstant>(ENT.ExactNotTaken)) {
+ auto UserIt = BECountUsers.find(ENT.ExactNotTaken);
+ if (UserIt != BECountUsers.end() &&
+ UserIt->second.contains({ LoopAndBEInfo.first, Predicated }))
+ continue;
+ dbgs() << "Value " << *ENT.ExactNotTaken << " for loop "
+ << *LoopAndBEInfo.first << " missing from BECountUsers\n";
+ std::abort();
+ }
+ }
+ }
+ };
+ VerifyBECountUsers(/* Predicated */ false);
+ VerifyBECountUsers(/* Predicated */ true);
}
bool ScalarEvolution::invalidate(
diff --git a/contrib/llvm-project/llvm/lib/Analysis/StackSafetyAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/StackSafetyAnalysis.cpp
index 74cc39b7f2c0..54f3605ee033 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/StackSafetyAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/StackSafetyAnalysis.cpp
@@ -14,12 +14,14 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ModuleSummaryAnalysis.h"
+#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/StackLifetime.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/ModuleSummaryIndex.h"
@@ -117,7 +119,7 @@ template <typename CalleeTy> struct UseInfo {
// Access range if the address (alloca or parameters).
// It is allowed to be empty-set when there are no known accesses.
ConstantRange Range;
- std::map<const Instruction *, ConstantRange> Accesses;
+ std::set<const Instruction *> UnsafeAccesses;
// List of calls which pass address as an argument.
// Value is offset range of address from base address (alloca or calling
@@ -131,10 +133,9 @@ template <typename CalleeTy> struct UseInfo {
UseInfo(unsigned PointerSize) : Range{PointerSize, false} {}
void updateRange(const ConstantRange &R) { Range = unionNoWrap(Range, R); }
- void addRange(const Instruction *I, const ConstantRange &R) {
- auto Ins = Accesses.emplace(I, R);
- if (!Ins.second)
- Ins.first->second = unionNoWrap(Ins.first->second, R);
+ void addRange(const Instruction *I, const ConstantRange &R, bool IsSafe) {
+ if (!IsSafe)
+ UnsafeAccesses.insert(I);
updateRange(R);
}
};
@@ -230,7 +231,7 @@ struct StackSafetyInfo::InfoTy {
struct StackSafetyGlobalInfo::InfoTy {
GVToSSI Info;
SmallPtrSet<const AllocaInst *, 8> SafeAllocas;
- std::map<const Instruction *, bool> AccessIsUnsafe;
+ std::set<const Instruction *> UnsafeAccesses;
};
namespace {
@@ -253,6 +254,11 @@ class StackSafetyLocalAnalysis {
void analyzeAllUses(Value *Ptr, UseInfo<GlobalValue> &AS,
const StackLifetime &SL);
+
+ bool isSafeAccess(const Use &U, AllocaInst *AI, const SCEV *AccessSize);
+ bool isSafeAccess(const Use &U, AllocaInst *AI, Value *V);
+ bool isSafeAccess(const Use &U, AllocaInst *AI, TypeSize AccessSize);
+
public:
StackSafetyLocalAnalysis(Function &F, ScalarEvolution &SE)
: F(F), DL(F.getParent()->getDataLayout()), SE(SE),
@@ -333,6 +339,56 @@ ConstantRange StackSafetyLocalAnalysis::getMemIntrinsicAccessRange(
return getAccessRange(U, Base, SizeRange);
}
+bool StackSafetyLocalAnalysis::isSafeAccess(const Use &U, AllocaInst *AI,
+ Value *V) {
+ return isSafeAccess(U, AI, SE.getSCEV(V));
+}
+
+bool StackSafetyLocalAnalysis::isSafeAccess(const Use &U, AllocaInst *AI,
+ TypeSize TS) {
+ if (TS.isScalable())
+ return false;
+ auto *CalculationTy = IntegerType::getIntNTy(SE.getContext(), PointerSize);
+ const SCEV *SV = SE.getConstant(CalculationTy, TS.getFixedSize());
+ return isSafeAccess(U, AI, SV);
+}
+
+bool StackSafetyLocalAnalysis::isSafeAccess(const Use &U, AllocaInst *AI,
+ const SCEV *AccessSize) {
+
+ if (!AI)
+ return true;
+ if (isa<SCEVCouldNotCompute>(AccessSize))
+ return false;
+
+ const auto *I = cast<Instruction>(U.getUser());
+
+ auto ToCharPtr = [&](const SCEV *V) {
+ auto *PtrTy = IntegerType::getInt8PtrTy(SE.getContext());
+ return SE.getTruncateOrZeroExtend(V, PtrTy);
+ };
+
+ const SCEV *AddrExp = ToCharPtr(SE.getSCEV(U.get()));
+ const SCEV *BaseExp = ToCharPtr(SE.getSCEV(AI));
+ const SCEV *Diff = SE.getMinusSCEV(AddrExp, BaseExp);
+ if (isa<SCEVCouldNotCompute>(Diff))
+ return false;
+
+ auto Size = getStaticAllocaSizeRange(*AI);
+
+ auto *CalculationTy = IntegerType::getIntNTy(SE.getContext(), PointerSize);
+ auto ToDiffTy = [&](const SCEV *V) {
+ return SE.getTruncateOrZeroExtend(V, CalculationTy);
+ };
+ const SCEV *Min = ToDiffTy(SE.getConstant(Size.getLower()));
+ const SCEV *Max = SE.getMinusSCEV(ToDiffTy(SE.getConstant(Size.getUpper())),
+ ToDiffTy(AccessSize));
+ return SE.evaluatePredicateAt(ICmpInst::Predicate::ICMP_SGE, Diff, Min, I)
+ .getValueOr(false) &&
+ SE.evaluatePredicateAt(ICmpInst::Predicate::ICMP_SLE, Diff, Max, I)
+ .getValueOr(false);
+}
+
/// The function analyzes all local uses of Ptr (alloca or argument) and
/// calculates local access range and all function calls where it was used.
void StackSafetyLocalAnalysis::analyzeAllUses(Value *Ptr,
@@ -341,7 +397,7 @@ void StackSafetyLocalAnalysis::analyzeAllUses(Value *Ptr,
SmallPtrSet<const Value *, 16> Visited;
SmallVector<const Value *, 8> WorkList;
WorkList.push_back(Ptr);
- const AllocaInst *AI = dyn_cast<AllocaInst>(Ptr);
+ AllocaInst *AI = dyn_cast<AllocaInst>(Ptr);
// A DFS search through all uses of the alloca in bitcasts/PHI/GEPs/etc.
while (!WorkList.empty()) {
@@ -356,11 +412,13 @@ void StackSafetyLocalAnalysis::analyzeAllUses(Value *Ptr,
switch (I->getOpcode()) {
case Instruction::Load: {
if (AI && !SL.isAliveAfter(AI, I)) {
- US.addRange(I, UnknownRange);
+ US.addRange(I, UnknownRange, /*IsSafe=*/false);
break;
}
- US.addRange(I,
- getAccessRange(UI, Ptr, DL.getTypeStoreSize(I->getType())));
+ auto TypeSize = DL.getTypeStoreSize(I->getType());
+ auto AccessRange = getAccessRange(UI, Ptr, TypeSize);
+ bool Safe = isSafeAccess(UI, AI, TypeSize);
+ US.addRange(I, AccessRange, Safe);
break;
}
@@ -370,16 +428,17 @@ void StackSafetyLocalAnalysis::analyzeAllUses(Value *Ptr,
case Instruction::Store: {
if (V == I->getOperand(0)) {
// Stored the pointer - conservatively assume it may be unsafe.
- US.addRange(I, UnknownRange);
+ US.addRange(I, UnknownRange, /*IsSafe=*/false);
break;
}
if (AI && !SL.isAliveAfter(AI, I)) {
- US.addRange(I, UnknownRange);
+ US.addRange(I, UnknownRange, /*IsSafe=*/false);
break;
}
- US.addRange(
- I, getAccessRange(
- UI, Ptr, DL.getTypeStoreSize(I->getOperand(0)->getType())));
+ auto TypeSize = DL.getTypeStoreSize(I->getOperand(0)->getType());
+ auto AccessRange = getAccessRange(UI, Ptr, TypeSize);
+ bool Safe = isSafeAccess(UI, AI, TypeSize);
+ US.addRange(I, AccessRange, Safe);
break;
}
@@ -387,7 +446,7 @@ void StackSafetyLocalAnalysis::analyzeAllUses(Value *Ptr,
// Information leak.
// FIXME: Process parameters correctly. This is a leak only if we return
// alloca.
- US.addRange(I, UnknownRange);
+ US.addRange(I, UnknownRange, /*IsSafe=*/false);
break;
case Instruction::Call:
@@ -396,12 +455,20 @@ void StackSafetyLocalAnalysis::analyzeAllUses(Value *Ptr,
break;
if (AI && !SL.isAliveAfter(AI, I)) {
- US.addRange(I, UnknownRange);
+ US.addRange(I, UnknownRange, /*IsSafe=*/false);
break;
}
-
if (const MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) {
- US.addRange(I, getMemIntrinsicAccessRange(MI, UI, Ptr));
+ auto AccessRange = getMemIntrinsicAccessRange(MI, UI, Ptr);
+ bool Safe = false;
+ if (const auto *MTI = dyn_cast<MemTransferInst>(MI)) {
+ if (MTI->getRawSource() != UI && MTI->getRawDest() != UI)
+ Safe = true;
+ } else if (MI->getRawDest() != UI) {
+ Safe = true;
+ }
+ Safe = Safe || isSafeAccess(UI, AI, MI->getLength());
+ US.addRange(I, AccessRange, Safe);
break;
}
@@ -412,15 +479,16 @@ void StackSafetyLocalAnalysis::analyzeAllUses(Value *Ptr,
}
if (!CB.isArgOperand(&UI)) {
- US.addRange(I, UnknownRange);
+ US.addRange(I, UnknownRange, /*IsSafe=*/false);
break;
}
unsigned ArgNo = CB.getArgOperandNo(&UI);
if (CB.isByValArgument(ArgNo)) {
- US.addRange(I, getAccessRange(
- UI, Ptr,
- DL.getTypeStoreSize(CB.getParamByValType(ArgNo))));
+ auto TypeSize = DL.getTypeStoreSize(CB.getParamByValType(ArgNo));
+ auto AccessRange = getAccessRange(UI, Ptr, TypeSize);
+ bool Safe = isSafeAccess(UI, AI, TypeSize);
+ US.addRange(I, AccessRange, Safe);
break;
}
@@ -430,7 +498,7 @@ void StackSafetyLocalAnalysis::analyzeAllUses(Value *Ptr,
const GlobalValue *Callee =
dyn_cast<GlobalValue>(CB.getCalledOperand()->stripPointerCasts());
if (!Callee) {
- US.addRange(I, UnknownRange);
+ US.addRange(I, UnknownRange, /*IsSafe=*/false);
break;
}
@@ -827,8 +895,8 @@ const StackSafetyGlobalInfo::InfoTy &StackSafetyGlobalInfo::getInfo() const {
Info->SafeAllocas.insert(AI);
++NumAllocaStackSafe;
}
- for (const auto &A : KV.second.Accesses)
- Info->AccessIsUnsafe[A.first] |= !AIRange.contains(A.second);
+ Info->UnsafeAccesses.insert(KV.second.UnsafeAccesses.begin(),
+ KV.second.UnsafeAccesses.end());
}
}
@@ -903,11 +971,7 @@ bool StackSafetyGlobalInfo::isSafe(const AllocaInst &AI) const {
bool StackSafetyGlobalInfo::stackAccessIsSafe(const Instruction &I) const {
const auto &Info = getInfo();
- auto It = Info.AccessIsUnsafe.find(&I);
- if (It == Info.AccessIsUnsafe.end()) {
- return true;
- }
- return !It->second;
+ return Info.UnsafeAccesses.find(&I) == Info.UnsafeAccesses.end();
}
void StackSafetyGlobalInfo::print(raw_ostream &O) const {
diff --git a/contrib/llvm-project/llvm/lib/Analysis/SyncDependenceAnalysis.cpp b/contrib/llvm-project/llvm/lib/Analysis/SyncDependenceAnalysis.cpp
index 59582cd3a198..ff833b55bbce 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/SyncDependenceAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/SyncDependenceAnalysis.cpp
@@ -15,21 +15,18 @@
// The SyncDependenceAnalysis is used in the DivergenceAnalysis to model
// control-induced divergence in phi nodes.
//
-// -- Summary --
-// The SyncDependenceAnalysis lazily computes sync dependences [3].
-// The analysis evaluates the disjoint path criterion [2] by a reduction
-// to SSA construction. The SSA construction algorithm is implemented as
-// a simple data-flow analysis [1].
//
-// [1] "A Simple, Fast Dominance Algorithm", SPI '01, Cooper, Harvey and Kennedy
-// [2] "Efficiently Computing Static Single Assignment Form
-// and the Control Dependence Graph", TOPLAS '91,
-// Cytron, Ferrante, Rosen, Wegman and Zadeck
-// [3] "Improving Performance of OpenCL on CPUs", CC '12, Karrenberg and Hack
-// [4] "Divergence Analysis", TOPLAS '13, Sampaio, Souza, Collange and Pereira
+// -- Reference --
+// The algorithm is presented in Section 5 of
+//
+// An abstract interpretation for SPMD divergence
+// on reducible control flow graphs.
+// Julian Rosemann, Simon Moll and Sebastian Hack
+// POPL '21
+//
//
// -- Sync dependence --
-// Sync dependence [4] characterizes the control flow aspect of the
+// Sync dependence characterizes the control flow aspect of the
// propagation of branch divergence. For example,
//
// %cond = icmp slt i32 %tid, 10
@@ -46,9 +43,10 @@
// because the branch "br i1 %cond" depends on %tid and affects which value %a
// is assigned to.
//
+//
// -- Reduction to SSA construction --
// There are two disjoint paths from A to X, if a certain variant of SSA
-// construction places a phi node in X under the following set-up scheme [2].
+// construction places a phi node in X under the following set-up scheme.
//
// This variant of SSA construction ignores incoming undef values.
// That is paths from the entry without a definition do not result in
@@ -63,6 +61,7 @@
// D E
// \ /
// F
+//
// Assume that A contains a divergent branch. We are interested
// in the set of all blocks where each block is reachable from A
// via two disjoint paths. This would be the set {D, F} in this
@@ -70,6 +69,7 @@
// To generally reduce this query to SSA construction we introduce
// a virtual variable x and assign to x different values in each
// successor block of A.
+//
// entry
// / \
// A \
@@ -79,23 +79,41 @@
// D E
// \ /
// F
+//
// Our flavor of SSA construction for x will construct the following
+//
// entry
// / \
// A \
// / \ Y
// x0 = 0 x1 = 1 /
// \ / \ /
-// x2=phi E
+// x2 = phi E
// \ /
-// x3=phi
+// x3 = phi
+//
// The blocks D and F contain phi nodes and are thus each reachable
// by two disjoins paths from A.
//
// -- Remarks --
-// In case of loop exits we need to check the disjoint path criterion for loops
-// [2]. To this end, we check whether the definition of x differs between the
-// loop exit and the loop header (_after_ SSA construction).
+// * In case of loop exits we need to check the disjoint path criterion for loops.
+// To this end, we check whether the definition of x differs between the
+// loop exit and the loop header (_after_ SSA construction).
+//
+// -- Known Limitations & Future Work --
+// * The algorithm requires reducible loops because the implementation
+// implicitly performs a single iteration of the underlying data flow analysis.
+// This was done for pragmatism, simplicity and speed.
+//
+// Relevant related work for extending the algorithm to irreducible control:
+// A simple algorithm for global data flow analysis problems.
+// Matthew S. Hecht and Jeffrey D. Ullman.
+// SIAM Journal on Computing, 4(4):519–532, December 1975.
+//
+// * Another reason for requiring reducible loops is that points of
+// synchronization in irreducible loops aren't 'obvious' - there is no unique
+// header where threads 'should' synchronize when entering or coming back
+// around from the latch.
//
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/SyncDependenceAnalysis.h"
@@ -128,8 +146,9 @@ using namespace llvm;
//
// We cannot use the vanilla (R)PO computation of LLVM because:
// * We (virtually) modify the CFG.
-// * We want a loop-compact block enumeration, that is the numbers assigned by
-// the traveral to the blocks of a loop are an interval.
+// * We want a loop-compact block enumeration, that is the numbers assigned to
+// blocks of a loop form an interval
+//
using POCB = std::function<void(const BasicBlock &)>;
using VisitedSet = std::set<const BasicBlock *>;
using BlockStack = std::vector<const BasicBlock *>;
diff --git a/contrib/llvm-project/llvm/lib/Analysis/TargetLibraryInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/TargetLibraryInfo.cpp
index 7326ba74c071..72fbd5ad3f68 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/TargetLibraryInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/TargetLibraryInfo.cpp
@@ -166,8 +166,8 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
return;
}
- // memset_pattern16 is only available on iOS 3.0 and Mac OS X 10.5 and later.
- // All versions of watchOS support it.
+ // memset_pattern{4,8,16} is only available on iOS 3.0 and Mac OS X 10.5 and
+ // later. All versions of watchOS support it.
if (T.isMacOSX()) {
// available IO unlocked variants on Mac OS X
TLI.setAvailable(LibFunc_getc_unlocked);
@@ -175,12 +175,20 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
TLI.setAvailable(LibFunc_putc_unlocked);
TLI.setAvailable(LibFunc_putchar_unlocked);
- if (T.isMacOSXVersionLT(10, 5))
+ if (T.isMacOSXVersionLT(10, 5)) {
+ TLI.setUnavailable(LibFunc_memset_pattern4);
+ TLI.setUnavailable(LibFunc_memset_pattern8);
TLI.setUnavailable(LibFunc_memset_pattern16);
+ }
} else if (T.isiOS()) {
- if (T.isOSVersionLT(3, 0))
+ if (T.isOSVersionLT(3, 0)) {
+ TLI.setUnavailable(LibFunc_memset_pattern4);
+ TLI.setUnavailable(LibFunc_memset_pattern8);
TLI.setUnavailable(LibFunc_memset_pattern16);
+ }
} else if (!T.isWatchOS()) {
+ TLI.setUnavailable(LibFunc_memset_pattern4);
+ TLI.setUnavailable(LibFunc_memset_pattern8);
TLI.setUnavailable(LibFunc_memset_pattern16);
}
@@ -684,7 +692,6 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
TLI.setUnavailable(LibFunc_strcat_chk);
TLI.setUnavailable(LibFunc_strcpy_chk);
TLI.setUnavailable(LibFunc_strlcat_chk);
- TLI.setUnavailable(LibFunc_strlcat_chk);
TLI.setUnavailable(LibFunc_strlcpy_chk);
TLI.setUnavailable(LibFunc_strlen_chk);
TLI.setUnavailable(LibFunc_strncat_chk);
@@ -1523,6 +1530,8 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
FTy.getParamType(2)->isPointerTy() &&
FTy.getParamType(3)->isIntegerTy());
+ case LibFunc_memset_pattern4:
+ case LibFunc_memset_pattern8:
case LibFunc_memset_pattern16:
return (!FTy.isVarArg() && NumParams == 3 &&
FTy.getParamType(0)->isPointerTy() &&
diff --git a/contrib/llvm-project/llvm/lib/Analysis/VFABIDemangling.cpp b/contrib/llvm-project/llvm/lib/Analysis/VFABIDemangling.cpp
index 8a34a34eb307..7573975a3dd3 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/VFABIDemangling.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/VFABIDemangling.cpp
@@ -445,7 +445,6 @@ Optional<VFInfo> VFABI::tryDemangleForVFABI(StringRef MangledName,
VF = EC.getKnownMinValue();
}
- // Sanity checks.
// 1. We don't accept a zero lanes vectorization factor.
// 2. We don't accept the demangling if the vector function is not
// present in the module.
diff --git a/contrib/llvm-project/llvm/lib/AsmParser/LLParser.cpp b/contrib/llvm-project/llvm/lib/AsmParser/LLParser.cpp
index 5bce1eaa59a0..5feabd876e3a 100644
--- a/contrib/llvm-project/llvm/lib/AsmParser/LLParser.cpp
+++ b/contrib/llvm-project/llvm/lib/AsmParser/LLParser.cpp
@@ -124,8 +124,8 @@ void LLParser::restoreParsingState(const SlotMapping *Slots) {
std::make_pair(I.first, std::make_pair(I.second, LocTy())));
}
-/// validateEndOfModule - Do final validity and sanity checks at the end of the
-/// module.
+/// validateEndOfModule - Do final validity and basic correctness checks at the
+/// end of the module.
bool LLParser::validateEndOfModule(bool UpgradeDebugInfo) {
if (!M)
return false;
@@ -271,7 +271,7 @@ bool LLParser::validateEndOfModule(bool UpgradeDebugInfo) {
return false;
}
-/// Do final validity and sanity checks at the end of the index.
+/// Do final validity and basic correctness checks at the end of the index.
bool LLParser::validateEndOfIndex() {
if (!Index)
return false;
@@ -2989,9 +2989,10 @@ BasicBlock *LLParser::PerFunctionState::defineBB(const std::string &Name,
/// parseValID - parse an abstract value that doesn't necessarily have a
/// type implied. For example, if we parse "4" we don't know what integer type
/// it has. The value will later be combined with its type and checked for
-/// sanity. PFS is used to convert function-local operands of metadata (since
-/// metadata operands are not just parsed here but also converted to values).
-/// PFS can be null when we are not parsing metadata values inside a function.
+/// basic correctness. PFS is used to convert function-local operands of
+/// metadata (since metadata operands are not just parsed here but also
+/// converted to values). PFS can be null when we are not parsing metadata
+/// values inside a function.
bool LLParser::parseValID(ValID &ID, PerFunctionState *PFS, Type *ExpectedTy) {
ID.Loc = Lex.getLoc();
switch (Lex.getKind()) {
diff --git a/contrib/llvm-project/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp b/contrib/llvm-project/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
index 2723105b092f..d7bcb0d7f575 100644
--- a/contrib/llvm-project/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
+++ b/contrib/llvm-project/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
@@ -957,8 +957,8 @@ Error BitcodeAnalyzer::parseBlock(unsigned BlockID, unsigned IndentLevel,
O->OS.write_escaped(Blob, /*hex=*/true) << "'";
} else {
bool BlobIsPrintable = true;
- for (unsigned i = 0, e = Blob.size(); i != e; ++i)
- if (!isPrint(static_cast<unsigned char>(Blob[i]))) {
+ for (char C : Blob)
+ if (!isPrint(static_cast<unsigned char>(C))) {
BlobIsPrintable = false;
break;
}
diff --git a/contrib/llvm-project/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/contrib/llvm-project/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index c568461e62b0..993cb1de8c02 100644
--- a/contrib/llvm-project/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/contrib/llvm-project/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -3996,8 +3996,8 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
// See if anything took the address of blocks in this function.
auto BBFRI = BasicBlockFwdRefs.find(F);
if (BBFRI == BasicBlockFwdRefs.end()) {
- for (unsigned i = 0, e = FunctionBBs.size(); i != e; ++i)
- FunctionBBs[i] = BasicBlock::Create(Context, "", F);
+ for (BasicBlock *&BB : FunctionBBs)
+ BB = BasicBlock::Create(Context, "", F);
} else {
auto &BBRefs = BBFRI->second;
// Check for invalid basic block references.
@@ -4605,9 +4605,8 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
CaseVals.push_back(ConstantInt::get(Context, Low));
}
BasicBlock *DestBB = getBasicBlock(Record[CurIdx++]);
- for (SmallVector<ConstantInt*, 1>::iterator cvi = CaseVals.begin(),
- cve = CaseVals.end(); cvi != cve; ++cvi)
- SI->addCase(*cvi, DestBB);
+ for (ConstantInt *Cst : CaseVals)
+ SI->addCase(Cst, DestBB);
}
I = SI;
break;
diff --git a/contrib/llvm-project/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/contrib/llvm-project/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
index 6df5a4a64d51..60530d7f7a00 100644
--- a/contrib/llvm-project/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
+++ b/contrib/llvm-project/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
@@ -444,7 +444,8 @@ class MetadataLoader::MetadataLoaderImpl {
uint64_t GlobalDeclAttachmentPos = 0;
#ifndef NDEBUG
- /// Sanity check that we end up parsing all of the global decl attachments.
+ /// Baisic correctness check that we end up parsing all of the global decl
+ /// attachments.
unsigned NumGlobalDeclAttachSkipped = 0;
unsigned NumGlobalDeclAttachParsed = 0;
#endif
@@ -917,7 +918,7 @@ Expected<bool> MetadataLoader::MetadataLoaderImpl::loadGlobalDeclAttachments() {
case BitstreamEntry::Error:
return error("Malformed block");
case BitstreamEntry::EndBlock:
- // Sanity check that we parsed them all.
+ // Check that we parsed them all.
assert(NumGlobalDeclAttachSkipped == NumGlobalDeclAttachParsed);
return true;
case BitstreamEntry::Record:
@@ -929,7 +930,7 @@ Expected<bool> MetadataLoader::MetadataLoaderImpl::loadGlobalDeclAttachments() {
return MaybeCode.takeError();
if (MaybeCode.get() != bitc::METADATA_GLOBAL_DECL_ATTACHMENT) {
// Anything other than a global decl attachment signals the end of
- // these records. sanity check that we parsed them all.
+ // these records. Check that we parsed them all.
assert(NumGlobalDeclAttachSkipped == NumGlobalDeclAttachParsed);
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/contrib/llvm-project/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 1e9a9197aed7..e2354c40844a 100644
--- a/contrib/llvm-project/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -596,10 +596,10 @@ static void writeStringRecord(BitstreamWriter &Stream, unsigned Code,
SmallVector<unsigned, 64> Vals;
// Code: [strchar x N]
- for (unsigned i = 0, e = Str.size(); i != e; ++i) {
- if (AbbrevToUse && !BitCodeAbbrevOp::isChar6(Str[i]))
+ for (char C : Str) {
+ if (AbbrevToUse && !BitCodeAbbrevOp::isChar6(C))
AbbrevToUse = 0;
- Vals.push_back(Str[i]);
+ Vals.push_back(C);
}
// Emit the finished record.
@@ -914,8 +914,7 @@ void ModuleBitcodeWriter::writeTypeTable() {
TypeVals.clear();
// Loop over all of the types, emitting each in turn.
- for (unsigned i = 0, e = TypeList.size(); i != e; ++i) {
- Type *T = TypeList[i];
+ for (Type *T : TypeList) {
int AbbrevToUse = 0;
unsigned Code = 0;
@@ -3343,19 +3342,18 @@ void ModuleBitcodeWriter::writeFunction(
DILocation *LastDL = nullptr;
// Finally, emit all the instructions, in order.
- for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
- for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
- I != E; ++I) {
- writeInstruction(*I, InstID, Vals);
+ for (const BasicBlock &BB : F)
+ for (const Instruction &I : BB) {
+ writeInstruction(I, InstID, Vals);
- if (!I->getType()->isVoidTy())
+ if (!I.getType()->isVoidTy())
++InstID;
// If the instruction has metadata, write a metadata attachment later.
- NeedsMetadataAttachment |= I->hasMetadataOtherThanDebugLoc();
+ NeedsMetadataAttachment |= I.hasMetadataOtherThanDebugLoc();
// If the instruction has a debug location, emit it.
- DILocation *DL = I->getDebugLoc();
+ DILocation *DL = I.getDebugLoc();
if (!DL)
continue;
@@ -4429,9 +4427,9 @@ void ModuleBitcodeWriter::write() {
// Emit function bodies.
DenseMap<const Function *, uint64_t> FunctionToBitcodeIndex;
- for (Module::const_iterator F = M.begin(), E = M.end(); F != E; ++F)
- if (!F->isDeclaration())
- writeFunction(*F, FunctionToBitcodeIndex);
+ for (const Function &F : M)
+ if (!F.isDeclaration())
+ writeFunction(F, FunctionToBitcodeIndex);
// Need to write after the above call to WriteFunction which populates
// the summary information in the index.
diff --git a/contrib/llvm-project/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp b/contrib/llvm-project/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
index 9465a3b11c8f..07e0708e68c3 100644
--- a/contrib/llvm-project/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/contrib/llvm-project/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -1148,8 +1148,8 @@ void ValueEnumerator::purgeFunction() {
ValueMap.erase(Values[i].first);
for (unsigned i = NumModuleMDs, e = MDs.size(); i != e; ++i)
MetadataMap.erase(MDs[i]);
- for (unsigned i = 0, e = BasicBlocks.size(); i != e; ++i)
- ValueMap.erase(BasicBlocks[i]);
+ for (const BasicBlock *BB : BasicBlocks)
+ ValueMap.erase(BB);
Values.resize(NumModuleValues);
MDs.resize(NumModuleMDs);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 87a3cede601b..5984063627b0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -354,8 +354,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(
// dead, or because only a subregister is live at the def. If we
// don't do this the dead def will be incorrectly merged into the
// previous def.
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI.getOperand(i);
+ for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg() || !MO.isDef()) continue;
Register Reg = MO.getReg();
if (Reg == 0) continue;
@@ -407,8 +406,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(
// Scan the register defs for this instruction and update
// live-ranges.
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI.getOperand(i);
+ for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg() || !MO.isDef()) continue;
Register Reg = MO.getReg();
if (Reg == 0) continue;
@@ -495,8 +493,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr &MI,
LLVM_DEBUG(dbgs() << "\tKill Group:");
unsigned FirstReg = 0;
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI.getOperand(i);
+ for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg()) continue;
Register Reg = MO.getReg();
if (Reg == 0) continue;
@@ -762,11 +759,8 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
// ...need a map from MI to SUnit.
std::map<MachineInstr *, const SUnit *> MISUnitMap;
- for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
- const SUnit *SU = &SUnits[i];
- MISUnitMap.insert(std::pair<MachineInstr *, const SUnit *>(SU->getInstr(),
- SU));
- }
+ for (const SUnit &SU : SUnits)
+ MISUnitMap.insert(std::make_pair(SU.getInstr(), &SU));
// Track progress along the critical path through the SUnit graph as
// we walk the instructions. This is needed for regclasses that only
@@ -774,12 +768,11 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
const SUnit *CriticalPathSU = nullptr;
MachineInstr *CriticalPathMI = nullptr;
if (CriticalPathSet.any()) {
- for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
- const SUnit *SU = &SUnits[i];
+ for (const SUnit &SU : SUnits) {
if (!CriticalPathSU ||
- ((SU->getDepth() + SU->Latency) >
+ ((SU.getDepth() + SU.Latency) >
(CriticalPathSU->getDepth() + CriticalPathSU->Latency))) {
- CriticalPathSU = SU;
+ CriticalPathSU = &SU;
}
}
assert(CriticalPathSU && "Failed to find SUnit critical path");
@@ -839,8 +832,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
// but don't cause any anti-dependence breaking themselves)
if (!MI.isKill()) {
// Attempt to break each anti-dependency...
- for (unsigned i = 0, e = Edges.size(); i != e; ++i) {
- const SDep *Edge = Edges[i];
+ for (const SDep *Edge : Edges) {
SUnit *NextSU = Edge->getSUnit();
if ((Edge->getKind() != SDep::Anti) &&
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index cc848d28a9a7..828cb760b82e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -809,9 +809,9 @@ void AsmPrinter::emitFunctionHeader() {
// so that we don't get references to undefined symbols.
std::vector<MCSymbol*> DeadBlockSyms;
MMI->takeDeletedSymbolsForFunction(&F, DeadBlockSyms);
- for (unsigned i = 0, e = DeadBlockSyms.size(); i != e; ++i) {
+ for (MCSymbol *DeadBlockSym : DeadBlockSyms) {
OutStreamer->AddComment("Address taken block that was later removed");
- OutStreamer->emitLabel(DeadBlockSyms[i]);
+ OutStreamer->emitLabel(DeadBlockSym);
}
if (CurrentFnBegin) {
@@ -910,8 +910,7 @@ static void emitKill(const MachineInstr *MI, AsmPrinter &AP) {
std::string Str;
raw_string_ostream OS(Str);
OS << "kill:";
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &Op = MI->getOperand(i);
+ for (const MachineOperand &Op : MI->operands()) {
assert(Op.isReg() && "KILL instruction must have only register operands");
OS << ' ' << (Op.isDef() ? "def " : "killed ")
<< printReg(Op.getReg(), AP.MF->getSubtarget().getRegisterInfo());
@@ -2150,8 +2149,7 @@ void AsmPrinter::emitJumpTableInfo() {
SmallPtrSet<const MachineBasicBlock*, 16> EmittedSets;
const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
const MCExpr *Base = TLI->getPICJumpTableRelocBaseExpr(MF,JTI,OutContext);
- for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) {
- const MachineBasicBlock *MBB = JTBBs[ii];
+ for (const MachineBasicBlock *MBB : JTBBs) {
if (!EmittedSets.insert(MBB).second)
continue;
@@ -2177,8 +2175,8 @@ void AsmPrinter::emitJumpTableInfo() {
MCSymbol* JTISymbol = GetJTISymbol(JTI);
OutStreamer->emitLabel(JTISymbol);
- for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii)
- emitJumpTableEntry(MJTI, JTBBs[ii], JTI);
+ for (const MachineBasicBlock *MBB : JTBBs)
+ emitJumpTableEntry(MJTI, MBB, JTI);
}
if (!JTInDiffSection)
OutStreamer->emitDataRegion(MCDR_DataRegionEnd);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index ef1abc47701a..5d0cadefdbf7 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -128,191 +128,29 @@ void AsmPrinter::emitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
emitInlineAsmEnd(STI, &TAP->getSTI());
}
-static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
- MachineModuleInfo *MMI, const MCAsmInfo *MAI,
- AsmPrinter *AP, uint64_t LocCookie,
- raw_ostream &OS) {
- // Switch to the inline assembly variant.
- OS << "\t.intel_syntax\n\t";
-
- int CurVariant = -1; // The number of the {.|.|.} region we are in.
- const char *LastEmitted = AsmStr; // One past the last character emitted.
- unsigned NumOperands = MI->getNumOperands();
- int AsmPrinterVariant = 1; // X86MCAsmInfo.cpp's AsmWriterFlavorTy::Intel.
-
- while (*LastEmitted) {
- switch (*LastEmitted) {
- default: {
- // Not a special case, emit the string section literally.
- const char *LiteralEnd = LastEmitted+1;
- while (*LiteralEnd && *LiteralEnd != '{' && *LiteralEnd != '|' &&
- *LiteralEnd != '}' && *LiteralEnd != '$' && *LiteralEnd != '\n')
- ++LiteralEnd;
- if (CurVariant == -1 || CurVariant == AsmPrinterVariant)
- OS.write(LastEmitted, LiteralEnd - LastEmitted);
- LastEmitted = LiteralEnd;
- break;
- }
- case '\n':
- ++LastEmitted; // Consume newline character.
- OS << '\n'; // Indent code with newline.
- break;
- case '$': {
- ++LastEmitted; // Consume '$' character.
- bool Done = true;
-
- // Handle escapes.
- switch (*LastEmitted) {
- default: Done = false; break;
- case '$':
- ++LastEmitted; // Consume second '$' character.
- break;
- case '(': // $( -> same as GCC's { character.
- ++LastEmitted; // Consume '(' character.
- if (CurVariant != -1)
- report_fatal_error("Nested variants found in inline asm string: '" +
- Twine(AsmStr) + "'");
- CurVariant = 0; // We're in the first variant now.
- break;
- case '|':
- ++LastEmitted; // Consume '|' character.
- if (CurVariant == -1)
- OS << '|'; // This is gcc's behavior for | outside a variant.
- else
- ++CurVariant; // We're in the next variant.
- break;
- case ')': // $) -> same as GCC's } char.
- ++LastEmitted; // Consume ')' character.
- if (CurVariant == -1)
- OS << '}'; // This is gcc's behavior for } outside a variant.
- else
- CurVariant = -1;
- break;
- }
- if (Done) break;
-
- bool HasCurlyBraces = false;
- if (*LastEmitted == '{') { // ${variable}
- ++LastEmitted; // Consume '{' character.
- HasCurlyBraces = true;
- }
-
- // If we have ${:foo}, then this is not a real operand reference, it is a
- // "magic" string reference, just like in .td files. Arrange to call
- // PrintSpecial.
- if (HasCurlyBraces && *LastEmitted == ':') {
- ++LastEmitted;
- const char *StrStart = LastEmitted;
- const char *StrEnd = strchr(StrStart, '}');
- if (!StrEnd)
- report_fatal_error("Unterminated ${:foo} operand in inline asm"
- " string: '" + Twine(AsmStr) + "'");
- if (CurVariant == -1 || CurVariant == AsmPrinterVariant)
- AP->PrintSpecial(MI, OS, StringRef(StrStart, StrEnd - StrStart));
- LastEmitted = StrEnd+1;
- break;
- }
-
- const char *IDStart = LastEmitted;
- const char *IDEnd = IDStart;
- while (isDigit(*IDEnd))
- ++IDEnd;
-
- unsigned Val;
- if (StringRef(IDStart, IDEnd-IDStart).getAsInteger(10, Val))
- report_fatal_error("Bad $ operand number in inline asm string: '" +
- Twine(AsmStr) + "'");
- LastEmitted = IDEnd;
-
- if (Val >= NumOperands - 1)
- report_fatal_error("Invalid $ operand number in inline asm string: '" +
- Twine(AsmStr) + "'");
-
- char Modifier[2] = { 0, 0 };
-
- if (HasCurlyBraces) {
- // If we have curly braces, check for a modifier character. This
- // supports syntax like ${0:u}, which correspond to "%u0" in GCC asm.
- if (*LastEmitted == ':') {
- ++LastEmitted; // Consume ':' character.
- if (*LastEmitted == 0)
- report_fatal_error("Bad ${:} expression in inline asm string: '" +
- Twine(AsmStr) + "'");
-
- Modifier[0] = *LastEmitted;
- ++LastEmitted; // Consume modifier character.
- }
-
- if (*LastEmitted != '}')
- report_fatal_error("Bad ${} expression in inline asm string: '" +
- Twine(AsmStr) + "'");
- ++LastEmitted; // Consume '}' character.
- }
-
- // Okay, we finally have a value number. Ask the target to print this
- // operand!
- if (CurVariant == -1 || CurVariant == AsmPrinterVariant) {
- unsigned OpNo = InlineAsm::MIOp_FirstOperand;
-
- bool Error = false;
-
- // Scan to find the machine operand number for the operand.
- for (; Val; --Val) {
- if (OpNo >= MI->getNumOperands())
- break;
- unsigned OpFlags = MI->getOperand(OpNo).getImm();
- OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1;
- }
-
- // We may have a location metadata attached to the end of the
- // instruction, and at no point should see metadata at any
- // other point while processing. It's an error if so.
- if (OpNo >= MI->getNumOperands() || MI->getOperand(OpNo).isMetadata()) {
- Error = true;
- } else {
- unsigned OpFlags = MI->getOperand(OpNo).getImm();
- ++OpNo; // Skip over the ID number.
-
- // FIXME: Shouldn't arch-independent output template handling go into
- // PrintAsmOperand?
- // Labels are target independent.
- if (MI->getOperand(OpNo).isBlockAddress()) {
- const BlockAddress *BA = MI->getOperand(OpNo).getBlockAddress();
- MCSymbol *Sym = AP->GetBlockAddressSymbol(BA);
- Sym->print(OS, AP->MAI);
- MMI->getContext().registerInlineAsmLabel(Sym);
- } else if (InlineAsm::isMemKind(OpFlags)) {
- Error = AP->PrintAsmMemoryOperand(
- MI, OpNo, Modifier[0] ? Modifier : nullptr, OS);
- } else {
- Error = AP->PrintAsmOperand(MI, OpNo,
- Modifier[0] ? Modifier : nullptr, OS);
- }
- }
- if (Error) {
- std::string msg;
- raw_string_ostream Msg(msg);
- Msg << "invalid operand in inline asm: '" << AsmStr << "'";
- MMI->getModule()->getContext().emitError(LocCookie, Msg.str());
- }
- }
- break;
- }
- }
+static void EmitInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
+ MachineModuleInfo *MMI, const MCAsmInfo *MAI,
+ AsmPrinter *AP, uint64_t LocCookie,
+ raw_ostream &OS) {
+ bool InputIsIntelDialect = MI->getInlineAsmDialect() == InlineAsm::AD_Intel;
+
+ if (InputIsIntelDialect) {
+ // Switch to the inline assembly variant.
+ OS << "\t.intel_syntax\n\t";
}
- OS << "\n\t.att_syntax\n" << (char)0; // null terminate string.
-}
-static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
- MachineModuleInfo *MMI, const MCAsmInfo *MAI,
- AsmPrinter *AP, uint64_t LocCookie,
- raw_ostream &OS) {
int CurVariant = -1; // The number of the {.|.|.} region we are in.
const char *LastEmitted = AsmStr; // One past the last character emitted.
unsigned NumOperands = MI->getNumOperands();
- int AsmPrinterVariant = MMI->getTarget().unqualifiedInlineAsmVariant();
- if (MAI->getEmitGNUAsmStartIndentationMarker())
+ int AsmPrinterVariant;
+ if (InputIsIntelDialect)
+ AsmPrinterVariant = 1; // X86MCAsmInfo.cpp's AsmWriterFlavorTy::Intel.
+ else
+ AsmPrinterVariant = MMI->getTarget().unqualifiedInlineAsmVariant();
+
+ // FIXME: Should this happen for `asm inteldialect` as well?
+ if (!InputIsIntelDialect && MAI->getEmitGNUAsmStartIndentationMarker())
OS << '\t';
while (*LastEmitted) {
@@ -340,8 +178,9 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
switch (*LastEmitted) {
default: Done = false; break;
case '$': // $$ -> $
- if (CurVariant == -1 || CurVariant == AsmPrinterVariant)
- OS << '$';
+ if (!InputIsIntelDialect)
+ if (CurVariant == -1 || CurVariant == AsmPrinterVariant)
+ OS << '$';
++LastEmitted; // Consume second '$' character.
break;
case '(': // $( -> same as GCC's { character.
@@ -480,6 +319,8 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
}
}
}
+ if (InputIsIntelDialect)
+ OS << "\n\t.att_syntax";
OS << '\n' << (char)0; // null terminate string.
}
@@ -515,9 +356,8 @@ void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const {
// it.
uint64_t LocCookie = 0;
const MDNode *LocMD = nullptr;
- for (unsigned i = MI->getNumOperands(); i != 0; --i) {
- if (MI->getOperand(i-1).isMetadata() &&
- (LocMD = MI->getOperand(i-1).getMetadata()) &&
+ for (const MachineOperand &MO : llvm::reverse(MI->operands())) {
+ if (MO.isMetadata() && (LocMD = MO.getMetadata()) &&
LocMD->getNumOperands() != 0) {
if (const ConstantInt *CI =
mdconst::dyn_extract<ConstantInt>(LocMD->getOperand(0))) {
@@ -533,10 +373,7 @@ void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const {
raw_svector_ostream OS(StringData);
AsmPrinter *AP = const_cast<AsmPrinter*>(this);
- if (MI->getInlineAsmDialect() == InlineAsm::AD_ATT)
- EmitGCCInlineAsmStr(AsmStr, MI, MMI, MAI, AP, LocCookie, OS);
- else
- EmitMSInlineAsmStr(AsmStr, MI, MMI, MAI, AP, LocCookie, OS);
+ EmitInlineAsmStr(AsmStr, MI, MMI, MAI, AP, LocCookie, OS);
// Emit warnings if we use reserved registers on the clobber list, as
// that might lead to undefined behaviour.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index a36d2966d44a..9b73f0ab2f05 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -521,8 +521,8 @@ DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) {
}
// Construct a DIE for this scope.
-void DwarfCompileUnit::constructScopeDIE(
- LexicalScope *Scope, SmallVectorImpl<DIE *> &FinalChildren) {
+void DwarfCompileUnit::constructScopeDIE(LexicalScope *Scope,
+ DIE &ParentScopeDIE) {
if (!Scope || !Scope->getScopeNode())
return;
@@ -533,46 +533,27 @@ void DwarfCompileUnit::constructScopeDIE(
"constructSubprogramScopeDIE for non-inlined "
"subprograms");
- SmallVector<DIE *, 8> Children;
-
- // We try to create the scope DIE first, then the children DIEs. This will
- // avoid creating un-used children then removing them later when we find out
- // the scope DIE is null.
- DIE *ScopeDIE;
+ // Emit inlined subprograms.
if (Scope->getParent() && isa<DISubprogram>(DS)) {
- ScopeDIE = constructInlinedScopeDIE(Scope);
+ DIE *ScopeDIE = constructInlinedScopeDIE(Scope);
if (!ScopeDIE)
return;
- // We create children when the scope DIE is not null.
- createScopeChildrenDIE(Scope, Children);
- } else {
- // Early exit when we know the scope DIE is going to be null.
- if (DD->isLexicalScopeDIENull(Scope))
- return;
-
- bool HasNonScopeChildren = false;
- // We create children here when we know the scope DIE is not going to be
- // null and the children will be added to the scope DIE.
- createScopeChildrenDIE(Scope, Children, &HasNonScopeChildren);
-
- // If there are only other scopes as children, put them directly in the
- // parent instead, as this scope would serve no purpose.
- if (!HasNonScopeChildren) {
- FinalChildren.insert(FinalChildren.end(),
- std::make_move_iterator(Children.begin()),
- std::make_move_iterator(Children.end()));
- return;
- }
- ScopeDIE = constructLexicalScopeDIE(Scope);
- assert(ScopeDIE && "Scope DIE should not be null.");
+ ParentScopeDIE.addChild(ScopeDIE);
+ createAndAddScopeChildren(Scope, *ScopeDIE);
+ return;
}
- // Add children
- for (auto &I : Children)
- ScopeDIE->addChild(std::move(I));
+ // Early exit when we know the scope DIE is going to be null.
+ if (DD->isLexicalScopeDIENull(Scope))
+ return;
+
+ // Emit lexical blocks.
+ DIE *ScopeDIE = constructLexicalScopeDIE(Scope);
+ assert(ScopeDIE && "Scope DIE should not be null.");
- FinalChildren.push_back(std::move(ScopeDIE));
+ ParentScopeDIE.addChild(ScopeDIE);
+ createAndAddScopeChildren(Scope, *ScopeDIE);
}
void DwarfCompileUnit::addScopeRangeList(DIE &ScopeDIE,
@@ -1022,42 +1003,6 @@ sortLocalVars(SmallVectorImpl<DbgVariable *> &Input) {
return Result;
}
-DIE *DwarfCompileUnit::createScopeChildrenDIE(LexicalScope *Scope,
- SmallVectorImpl<DIE *> &Children,
- bool *HasNonScopeChildren) {
- assert(Children.empty());
- DIE *ObjectPointer = nullptr;
-
- // Emit function arguments (order is significant).
- auto Vars = DU->getScopeVariables().lookup(Scope);
- for (auto &DV : Vars.Args)
- Children.push_back(constructVariableDIE(*DV.second, *Scope, ObjectPointer));
-
- // Emit local variables.
- auto Locals = sortLocalVars(Vars.Locals);
- for (DbgVariable *DV : Locals)
- Children.push_back(constructVariableDIE(*DV, *Scope, ObjectPointer));
-
- // Skip imported directives in gmlt-like data.
- if (!includeMinimalInlineScopes()) {
- // There is no need to emit empty lexical block DIE.
- for (const auto *IE : ImportedEntities[Scope->getScopeNode()])
- Children.push_back(
- constructImportedEntityDIE(cast<DIImportedEntity>(IE)));
- }
-
- if (HasNonScopeChildren)
- *HasNonScopeChildren = !Children.empty();
-
- for (DbgLabel *DL : DU->getScopeLabels().lookup(Scope))
- Children.push_back(constructLabelDIE(*DL, *Scope));
-
- for (LexicalScope *LS : Scope->getChildren())
- constructScopeDIE(LS, Children);
-
- return ObjectPointer;
-}
-
DIE &DwarfCompileUnit::constructSubprogramScopeDIE(const DISubprogram *Sub,
LexicalScope *Scope) {
DIE &ScopeDIE = updateSubprogramScopeDIE(Sub);
@@ -1088,13 +1033,48 @@ DIE &DwarfCompileUnit::constructSubprogramScopeDIE(const DISubprogram *Sub,
DIE *DwarfCompileUnit::createAndAddScopeChildren(LexicalScope *Scope,
DIE &ScopeDIE) {
- // We create children when the scope DIE is not null.
- SmallVector<DIE *, 8> Children;
- DIE *ObjectPointer = createScopeChildrenDIE(Scope, Children);
+ DIE *ObjectPointer = nullptr;
+
+ // Emit function arguments (order is significant).
+ auto Vars = DU->getScopeVariables().lookup(Scope);
+ for (auto &DV : Vars.Args)
+ ScopeDIE.addChild(constructVariableDIE(*DV.second, *Scope, ObjectPointer));
+
+ // Emit local variables.
+ auto Locals = sortLocalVars(Vars.Locals);
+ for (DbgVariable *DV : Locals)
+ ScopeDIE.addChild(constructVariableDIE(*DV, *Scope, ObjectPointer));
+
+ // Emit imported entities (skipped in gmlt-like data).
+ if (!includeMinimalInlineScopes()) {
+ for (const auto *IE : ImportedEntities[Scope->getScopeNode()])
+ ScopeDIE.addChild(constructImportedEntityDIE(cast<DIImportedEntity>(IE)));
+ }
+
+ // Emit labels.
+ for (DbgLabel *DL : DU->getScopeLabels().lookup(Scope))
+ ScopeDIE.addChild(constructLabelDIE(*DL, *Scope));
- // Add children
- for (auto &I : Children)
- ScopeDIE.addChild(std::move(I));
+ // Emit inner lexical scopes.
+ auto needToEmitLexicalScope = [this](LexicalScope *LS) {
+ if (isa<DISubprogram>(LS->getScopeNode()))
+ return true;
+ auto Vars = DU->getScopeVariables().lookup(LS);
+ if (!Vars.Args.empty() || !Vars.Locals.empty())
+ return true;
+ if (!includeMinimalInlineScopes() &&
+ !ImportedEntities[LS->getScopeNode()].empty())
+ return true;
+ return false;
+ };
+ for (LexicalScope *LS : Scope->getChildren()) {
+ // If the lexical block doesn't have non-scope children, skip
+ // its emission and put its children directly to the parent scope.
+ if (needToEmitLexicalScope(LS))
+ constructScopeDIE(LS, ScopeDIE);
+ else
+ createAndAddScopeChildren(LS, ScopeDIE);
+ }
return ObjectPointer;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index 6e9261087686..fb03982b5e4a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -191,8 +191,7 @@ public:
/// variables.
DIE &updateSubprogramScopeDIE(const DISubprogram *SP);
- void constructScopeDIE(LexicalScope *Scope,
- SmallVectorImpl<DIE *> &FinalChildren);
+ void constructScopeDIE(LexicalScope *Scope, DIE &ParentScopeDIE);
/// A helper function to construct a RangeSpanList for a given
/// lexical scope.
@@ -220,11 +219,6 @@ public:
/// Construct a DIE for the given DbgLabel.
DIE *constructLabelDIE(DbgLabel &DL, const LexicalScope &Scope);
- /// A helper function to create children of a Scope DIE.
- DIE *createScopeChildrenDIE(LexicalScope *Scope,
- SmallVectorImpl<DIE *> &Children,
- bool *HasNonScopeChildren = nullptr);
-
void createBaseTypeDIEs();
/// Construct a DIE for this subprogram scope.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
index 150f19324834..39f40b172c1b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
@@ -162,9 +162,7 @@ bool EHStreamer::callToNoUnwindFunction(const MachineInstr *MI) {
bool MarkedNoUnwind = false;
bool SawFunc = false;
- for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
- const MachineOperand &MO = MI->getOperand(I);
-
+ for (const MachineOperand &MO : MI->operands()) {
if (!MO.isGlobal()) continue;
const Function *F = dyn_cast<Function>(MO.getGlobal());
@@ -386,8 +384,8 @@ MCSymbol *EHStreamer::emitExceptionTable() {
SmallVector<const LandingPadInfo *, 64> LandingPads;
LandingPads.reserve(PadInfos.size());
- for (unsigned i = 0, N = PadInfos.size(); i != N; ++i)
- LandingPads.push_back(&PadInfos[i]);
+ for (const LandingPadInfo &LPI : PadInfos)
+ LandingPads.push_back(&LPI);
// Order landing pads lexicographically by type id.
llvm::sort(LandingPads, [](const LandingPadInfo *L, const LandingPadInfo *R) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp
index 5ac8f49a9522..64dadc82b48b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp
@@ -1013,8 +1013,8 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
// If this is a large problem, avoid visiting the same basic blocks
// multiple times.
if (MergePotentials.size() == TailMergeThreshold)
- for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
- TriedMerging.insert(MergePotentials[i].getBlock());
+ for (const MergePotentialsElt &Elt : MergePotentials)
+ TriedMerging.insert(Elt.getBlock());
// See if we can do any tail merging on those.
if (MergePotentials.size() >= 2)
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp
index 50825ccf9bac..eda0f37fdeb7 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp
@@ -513,9 +513,7 @@ bool BranchRelaxation::relaxBranchInstructions() {
// Relaxing branches involves creating new basic blocks, so re-eval
// end() for termination.
- for (MachineFunction::iterator I = MF->begin(); I != MF->end(); ++I) {
- MachineBasicBlock &MBB = *I;
-
+ for (MachineBasicBlock &MBB : *MF) {
// Empty block?
MachineBasicBlock::iterator Last = MBB.getLastNonDebugInstr();
if (Last == MBB.end())
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp
index e0e2db9f4725..bbdd8aab502e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp
@@ -58,8 +58,10 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeLiveVariablesPass(Registry);
initializeLocalStackSlotPassPass(Registry);
initializeLowerIntrinsicsPass(Registry);
+ initializeMIRAddFSDiscriminatorsPass(Registry);
initializeMIRCanonicalizerPass(Registry);
initializeMIRNamerPass(Registry);
+ initializeMIRProfileLoaderPassPass(Registry);
initializeMachineBlockFrequencyInfoPass(Registry);
initializeMachineBlockPlacementPass(Registry);
initializeMachineBlockPlacementStatsPass(Registry);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp
index a1ff02178ffa..3bed81d5841d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp
@@ -90,7 +90,7 @@ CGOPT(bool, EnableAddrsig)
CGOPT(bool, EmitCallSiteInfo)
CGOPT(bool, EnableMachineFunctionSplitter)
CGOPT(bool, EnableDebugEntryValues)
-CGOPT(bool, ValueTrackingVariableLocations)
+CGOPT_EXP(bool, ValueTrackingVariableLocations)
CGOPT(bool, ForceDwarfFrameSection)
CGOPT(bool, XRayOmitFunctionIndex)
CGOPT(bool, DebugStrictDwarf)
@@ -534,12 +534,17 @@ codegen::InitTargetOptionsFromCodeGenFlags(const Triple &TheTriple) {
Options.EmitAddrsig = getEnableAddrsig();
Options.EmitCallSiteInfo = getEmitCallSiteInfo();
Options.EnableDebugEntryValues = getEnableDebugEntryValues();
- Options.ValueTrackingVariableLocations = getValueTrackingVariableLocations();
Options.ForceDwarfFrameSection = getForceDwarfFrameSection();
Options.XRayOmitFunctionIndex = getXRayOmitFunctionIndex();
Options.DebugStrictDwarf = getDebugStrictDwarf();
Options.LoopAlignment = getAlignLoops();
+ if (auto Opt = getExplicitValueTrackingVariableLocations())
+ Options.ValueTrackingVariableLocations = *Opt;
+ else
+ Options.ValueTrackingVariableLocations =
+ getDefaultValueTrackingVariableLocations(TheTriple);
+
Options.MCOptions = mc::InitMCTargetOptionsFromFlags();
Options.ThreadModel = getThreadModel();
@@ -692,3 +697,9 @@ void codegen::setFunctionAttributes(StringRef CPU, StringRef Features,
for (Function &F : M)
setFunctionAttributes(CPU, Features, F);
}
+
+bool codegen::getDefaultValueTrackingVariableLocations(const llvm::Triple &T) {
+ if (T.getArch() == llvm::Triple::x86_64)
+ return true;
+ return false;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
index 981f5973fee8..4e98d49206b5 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -370,9 +370,7 @@ CriticalAntiDepBreaker::isNewRegClobberedByRefs(RegRefIter RegRefBegin,
// Handle cases in which this instruction defines NewReg.
MachineInstr *MI = RefOper->getParent();
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &CheckOper = MI->getOperand(i);
-
+ for (const MachineOperand &CheckOper : MI->operands()) {
if (CheckOper.isRegMask() && CheckOper.clobbersPhysReg(NewReg))
return true;
@@ -462,11 +460,10 @@ BreakAntiDependencies(const std::vector<SUnit> &SUnits,
// Find the node at the bottom of the critical path.
const SUnit *Max = nullptr;
- for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
- const SUnit *SU = &SUnits[i];
- MISUnitMap[SU->getInstr()] = SU;
- if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency)
- Max = SU;
+ for (const SUnit &SU : SUnits) {
+ MISUnitMap[SU.getInstr()] = &SU;
+ if (!Max || SU.getDepth() + SU.Latency > Max->getDepth() + Max->Latency)
+ Max = &SU;
}
assert(Max && "Failed to find bottom of the critical path");
@@ -621,8 +618,7 @@ BreakAntiDependencies(const std::vector<SUnit> &SUnits,
// is invalid. If the instruction defines other registers,
// save a list of them so that we don't pick a new register
// that overlaps any of them.
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI.getOperand(i);
+ for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg()) continue;
Register Reg = MO.getReg();
if (Reg == 0) continue;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
index c6c0b79cd7e7..0bb186a02416 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -76,8 +76,7 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
return false;
// Examine each operand.
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (const MachineOperand &MO : MI->operands()) {
if (MO.isReg() && MO.isDef()) {
Register Reg = MO.getReg();
if (Register::isPhysicalRegister(Reg)) {
@@ -87,7 +86,7 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
} else {
if (MO.isDead()) {
#ifndef NDEBUG
- // Sanity check on uses of this dead register. All of them should be
+ // Baisc check on the register. All of them should be
// 'undef'.
for (auto &U : MRI->use_nodbg_operands(Reg))
assert(U.isUndef() && "'Undef' use on a 'dead' register is found!");
@@ -152,8 +151,7 @@ bool DeadMachineInstructionElim::eliminateDeadMI(MachineFunction &MF) {
}
// Record the physreg defs.
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI.getOperand(i);
+ for (const MachineOperand &MO : MI.operands()) {
if (MO.isReg() && MO.isDef()) {
Register Reg = MO.getReg();
if (Register::isPhysicalRegister(Reg)) {
@@ -171,8 +169,7 @@ bool DeadMachineInstructionElim::eliminateDeadMI(MachineFunction &MF) {
}
// Record the physreg uses, after the defs, in case a physreg is
// both defined and used in the same instruction.
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI.getOperand(i);
+ for (const MachineOperand &MO : MI.operands()) {
if (MO.isReg() && MO.isUse()) {
Register Reg = MO.getReg();
if (Register::isPhysicalRegister(Reg)) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 3a52959d54bf..755b3b844570 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -27,6 +27,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/Support/Casting.h"
@@ -3732,8 +3733,7 @@ void CombinerHelper::applyExtendThroughPhis(MachineInstr &MI,
Builder.setInstrAndDebugLoc(MI);
auto NewPhi = Builder.buildInstrNoInsert(TargetOpcode::G_PHI);
NewPhi.addDef(DstReg);
- for (unsigned SrcIdx = 1; SrcIdx < MI.getNumOperands(); ++SrcIdx) {
- auto &MO = MI.getOperand(SrcIdx);
+ for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
if (!MO.isReg()) {
NewPhi.addMBB(MO.getMBB());
continue;
@@ -3825,8 +3825,7 @@ bool CombinerHelper::matchExtractAllEltsFromBuildVector(
unsigned NumElts = DstTy.getNumElements();
SmallBitVector ExtractedElts(NumElts);
- for (auto &II : make_range(MRI.use_instr_nodbg_begin(DstReg),
- MRI.use_instr_nodbg_end())) {
+ for (MachineInstr &II : MRI.use_nodbg_instructions(DstReg)) {
if (II.getOpcode() != TargetOpcode::G_EXTRACT_VECTOR_ELT)
return false;
auto Cst = getIConstantVRegVal(II.getOperand(2).getReg(), MRI);
@@ -3868,6 +3867,51 @@ void CombinerHelper::applyBuildFnNoErase(
MatchInfo(Builder);
}
+bool CombinerHelper::matchOrShiftToFunnelShift(MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_OR);
+
+ Register Dst = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(Dst);
+ unsigned BitWidth = Ty.getScalarSizeInBits();
+
+ Register ShlSrc, ShlAmt, LShrSrc, LShrAmt;
+ unsigned FshOpc = 0;
+
+ // Match (or (shl x, amt), (lshr y, sub(bw, amt))).
+ if (mi_match(
+ Dst, MRI,
+ // m_GOr() handles the commuted version as well.
+ m_GOr(m_GShl(m_Reg(ShlSrc), m_Reg(ShlAmt)),
+ m_GLShr(m_Reg(LShrSrc), m_GSub(m_SpecificICstOrSplat(BitWidth),
+ m_Reg(LShrAmt)))))) {
+ FshOpc = TargetOpcode::G_FSHL;
+
+ // Match (or (shl x, sub(bw, amt)), (lshr y, amt)).
+ } else if (mi_match(Dst, MRI,
+ m_GOr(m_GLShr(m_Reg(LShrSrc), m_Reg(LShrAmt)),
+ m_GShl(m_Reg(ShlSrc),
+ m_GSub(m_SpecificICstOrSplat(BitWidth),
+ m_Reg(ShlAmt)))))) {
+ FshOpc = TargetOpcode::G_FSHR;
+
+ } else {
+ return false;
+ }
+
+ if (ShlAmt != LShrAmt)
+ return false;
+
+ LLT AmtTy = MRI.getType(ShlAmt);
+ if (!isLegalOrBeforeLegalizer({FshOpc, {Ty, AmtTy}}))
+ return false;
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildInstr(FshOpc, {Dst}, {ShlSrc, LShrSrc, ShlAmt});
+ };
+ return true;
+}
+
/// Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
bool CombinerHelper::matchFunnelShiftToRotate(MachineInstr &MI) {
unsigned Opc = MI.getOpcode();
@@ -4499,20 +4543,9 @@ bool CombinerHelper::matchNarrowBinopFeedingAnd(
bool CombinerHelper::matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo) {
unsigned Opc = MI.getOpcode();
assert(Opc == TargetOpcode::G_UMULO || Opc == TargetOpcode::G_SMULO);
- // Check for a constant 2 or a splat of 2 on the RHS.
- auto RHS = MI.getOperand(3).getReg();
- bool IsVector = MRI.getType(RHS).isVector();
- if (!IsVector && !mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICst(2)))
- return false;
- if (IsVector) {
- // FIXME: There's no mi_match pattern for this yet.
- auto *RHSDef = getDefIgnoringCopies(RHS, MRI);
- if (!RHSDef)
- return false;
- auto Splat = getBuildVectorConstantSplat(*RHSDef, MRI);
- if (!Splat || *Splat != 2)
- return false;
- }
+
+ if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(2)))
+ return false;
MatchInfo = [=, &MI](MachineIRBuilder &B) {
Observer.changingInstr(MI);
@@ -4760,6 +4793,556 @@ bool CombinerHelper::matchRedundantNegOperands(MachineInstr &MI,
return true;
}
+/// Checks if \p MI is TargetOpcode::G_FMUL and contractable either
+/// due to global flags or MachineInstr flags.
+static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally) {
+ if (MI.getOpcode() != TargetOpcode::G_FMUL)
+ return false;
+ return AllowFusionGlobally || MI.getFlag(MachineInstr::MIFlag::FmContract);
+}
+
+static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1,
+ const MachineRegisterInfo &MRI) {
+ return std::distance(MRI.use_instr_nodbg_begin(MI0.getOperand(0).getReg()),
+ MRI.use_instr_nodbg_end()) >
+ std::distance(MRI.use_instr_nodbg_begin(MI1.getOperand(0).getReg()),
+ MRI.use_instr_nodbg_end());
+}
+
+bool CombinerHelper::canCombineFMadOrFMA(MachineInstr &MI,
+ bool &AllowFusionGlobally,
+ bool &HasFMAD, bool &Aggressive,
+ bool CanReassociate) {
+
+ auto *MF = MI.getMF();
+ const auto &TLI = *MF->getSubtarget().getTargetLowering();
+ const TargetOptions &Options = MF->getTarget().Options;
+ LLT DstType = MRI.getType(MI.getOperand(0).getReg());
+
+ if (CanReassociate &&
+ !(Options.UnsafeFPMath || MI.getFlag(MachineInstr::MIFlag::FmReassoc)))
+ return false;
+
+ // Floating-point multiply-add with intermediate rounding.
+ HasFMAD = (LI && TLI.isFMADLegal(MI, DstType));
+ // Floating-point multiply-add without intermediate rounding.
+ bool HasFMA = TLI.isFMAFasterThanFMulAndFAdd(*MF, DstType) &&
+ isLegalOrBeforeLegalizer({TargetOpcode::G_FMA, {DstType}});
+ // No valid opcode, do not combine.
+ if (!HasFMAD && !HasFMA)
+ return false;
+
+ AllowFusionGlobally = Options.AllowFPOpFusion == FPOpFusion::Fast ||
+ Options.UnsafeFPMath || HasFMAD;
+ // If the addition is not contractable, do not combine.
+ if (!AllowFusionGlobally && !MI.getFlag(MachineInstr::MIFlag::FmContract))
+ return false;
+
+ Aggressive = TLI.enableAggressiveFMAFusion(DstType);
+ return true;
+}
+
+bool CombinerHelper::matchCombineFAddFMulToFMadOrFMA(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_FADD);
+
+ bool AllowFusionGlobally, HasFMAD, Aggressive;
+ if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
+ return false;
+
+ MachineInstr *LHS = MRI.getVRegDef(MI.getOperand(1).getReg());
+ MachineInstr *RHS = MRI.getVRegDef(MI.getOperand(2).getReg());
+ unsigned PreferredFusedOpcode =
+ HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
+
+ // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
+ // prefer to fold the multiply with fewer uses.
+ if (Aggressive && isContractableFMul(*LHS, AllowFusionGlobally) &&
+ isContractableFMul(*RHS, AllowFusionGlobally)) {
+ if (hasMoreUses(*LHS, *RHS, MRI))
+ std::swap(LHS, RHS);
+ }
+
+ // fold (fadd (fmul x, y), z) -> (fma x, y, z)
+ if (isContractableFMul(*LHS, AllowFusionGlobally) &&
+ (Aggressive || MRI.hasOneNonDBGUse(LHS->getOperand(0).getReg()))) {
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {LHS->getOperand(1).getReg(), LHS->getOperand(2).getReg(),
+ RHS->getOperand(0).getReg()});
+ };
+ return true;
+ }
+
+ // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
+ if (isContractableFMul(*RHS, AllowFusionGlobally) &&
+ (Aggressive || MRI.hasOneNonDBGUse(RHS->getOperand(0).getReg()))) {
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {RHS->getOperand(1).getReg(), RHS->getOperand(2).getReg(),
+ LHS->getOperand(0).getReg()});
+ };
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchCombineFAddFpExtFMulToFMadOrFMA(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_FADD);
+
+ bool AllowFusionGlobally, HasFMAD, Aggressive;
+ if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
+ return false;
+
+ const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
+ MachineInstr *LHS = MRI.getVRegDef(MI.getOperand(1).getReg());
+ MachineInstr *RHS = MRI.getVRegDef(MI.getOperand(2).getReg());
+ LLT DstType = MRI.getType(MI.getOperand(0).getReg());
+
+ unsigned PreferredFusedOpcode =
+ HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
+
+ // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
+ // prefer to fold the multiply with fewer uses.
+ if (Aggressive && isContractableFMul(*LHS, AllowFusionGlobally) &&
+ isContractableFMul(*RHS, AllowFusionGlobally)) {
+ if (hasMoreUses(*LHS, *RHS, MRI))
+ std::swap(LHS, RHS);
+ }
+
+ // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
+ MachineInstr *FpExtSrc;
+ if (mi_match(LHS->getOperand(0).getReg(), MRI,
+ m_GFPExt(m_MInstr(FpExtSrc))) &&
+ isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
+ TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
+ MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
+ auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
+ B.buildInstr(
+ PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {FpExtX.getReg(0), FpExtY.getReg(0), RHS->getOperand(0).getReg()});
+ };
+ return true;
+ }
+
+ // fold (fadd z, (fpext (fmul x, y))) -> (fma (fpext x), (fpext y), z)
+ // Note: Commutes FADD operands.
+ if (mi_match(RHS->getOperand(0).getReg(), MRI,
+ m_GFPExt(m_MInstr(FpExtSrc))) &&
+ isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
+ TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
+ MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
+ auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
+ B.buildInstr(
+ PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {FpExtX.getReg(0), FpExtY.getReg(0), LHS->getOperand(0).getReg()});
+ };
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchCombineFAddFMAFMulToFMadOrFMA(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_FADD);
+
+ bool AllowFusionGlobally, HasFMAD, Aggressive;
+ if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive, true))
+ return false;
+
+ MachineInstr *LHS = MRI.getVRegDef(MI.getOperand(1).getReg());
+ MachineInstr *RHS = MRI.getVRegDef(MI.getOperand(2).getReg());
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+
+ unsigned PreferredFusedOpcode =
+ HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
+
+ // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
+ // prefer to fold the multiply with fewer uses.
+ if (Aggressive && isContractableFMul(*LHS, AllowFusionGlobally) &&
+ isContractableFMul(*RHS, AllowFusionGlobally)) {
+ if (hasMoreUses(*LHS, *RHS, MRI))
+ std::swap(LHS, RHS);
+ }
+
+ MachineInstr *FMA = nullptr;
+ Register Z;
+ // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z))
+ if (LHS->getOpcode() == PreferredFusedOpcode &&
+ (MRI.getVRegDef(LHS->getOperand(3).getReg())->getOpcode() ==
+ TargetOpcode::G_FMUL) &&
+ MRI.hasOneNonDBGUse(LHS->getOperand(0).getReg()) &&
+ MRI.hasOneNonDBGUse(LHS->getOperand(3).getReg())) {
+ FMA = LHS;
+ Z = RHS->getOperand(0).getReg();
+ }
+ // fold (fadd z, (fma x, y, (fmul u, v))) -> (fma x, y, (fma u, v, z))
+ else if (RHS->getOpcode() == PreferredFusedOpcode &&
+ (MRI.getVRegDef(RHS->getOperand(3).getReg())->getOpcode() ==
+ TargetOpcode::G_FMUL) &&
+ MRI.hasOneNonDBGUse(RHS->getOperand(0).getReg()) &&
+ MRI.hasOneNonDBGUse(RHS->getOperand(3).getReg())) {
+ Z = LHS->getOperand(0).getReg();
+ FMA = RHS;
+ }
+
+ if (FMA) {
+ MachineInstr *FMulMI = MRI.getVRegDef(FMA->getOperand(3).getReg());
+ Register X = FMA->getOperand(1).getReg();
+ Register Y = FMA->getOperand(2).getReg();
+ Register U = FMulMI->getOperand(1).getReg();
+ Register V = FMulMI->getOperand(2).getReg();
+
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ Register InnerFMA = MRI.createGenericVirtualRegister(DstTy);
+ B.buildInstr(PreferredFusedOpcode, {InnerFMA}, {U, V, Z});
+ B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {X, Y, InnerFMA});
+ };
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchCombineFAddFpExtFMulToFMadOrFMAAggressive(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_FADD);
+
+ bool AllowFusionGlobally, HasFMAD, Aggressive;
+ if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
+ return false;
+
+ if (!Aggressive)
+ return false;
+
+ const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
+ LLT DstType = MRI.getType(MI.getOperand(0).getReg());
+ MachineInstr *LHS = MRI.getVRegDef(MI.getOperand(1).getReg());
+ MachineInstr *RHS = MRI.getVRegDef(MI.getOperand(2).getReg());
+
+ unsigned PreferredFusedOpcode =
+ HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
+
+ // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
+ // prefer to fold the multiply with fewer uses.
+ if (Aggressive && isContractableFMul(*LHS, AllowFusionGlobally) &&
+ isContractableFMul(*RHS, AllowFusionGlobally)) {
+ if (hasMoreUses(*LHS, *RHS, MRI))
+ std::swap(LHS, RHS);
+ }
+
+ // Builds: (fma x, y, (fma (fpext u), (fpext v), z))
+ auto buildMatchInfo = [=, &MI](Register U, Register V, Register Z, Register X,
+ Register Y, MachineIRBuilder &B) {
+ Register FpExtU = B.buildFPExt(DstType, U).getReg(0);
+ Register FpExtV = B.buildFPExt(DstType, V).getReg(0);
+ Register InnerFMA =
+ B.buildInstr(PreferredFusedOpcode, {DstType}, {FpExtU, FpExtV, Z})
+ .getReg(0);
+ B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {X, Y, InnerFMA});
+ };
+
+ MachineInstr *FMulMI, *FMAMI;
+ // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
+ // -> (fma x, y, (fma (fpext u), (fpext v), z))
+ if (LHS->getOpcode() == PreferredFusedOpcode &&
+ mi_match(LHS->getOperand(3).getReg(), MRI, m_GFPExt(m_MInstr(FMulMI))) &&
+ isContractableFMul(*FMulMI, AllowFusionGlobally) &&
+ TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
+ MRI.getType(FMulMI->getOperand(0).getReg()))) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ buildMatchInfo(FMulMI->getOperand(1).getReg(),
+ FMulMI->getOperand(2).getReg(),
+ RHS->getOperand(0).getReg(), LHS->getOperand(1).getReg(),
+ LHS->getOperand(2).getReg(), B);
+ };
+ return true;
+ }
+
+ // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
+ // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
+ // FIXME: This turns two single-precision and one double-precision
+ // operation into two double-precision operations, which might not be
+ // interesting for all targets, especially GPUs.
+ if (mi_match(LHS->getOperand(0).getReg(), MRI, m_GFPExt(m_MInstr(FMAMI))) &&
+ FMAMI->getOpcode() == PreferredFusedOpcode) {
+ MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
+ if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
+ TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
+ MRI.getType(FMAMI->getOperand(0).getReg()))) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ Register X = FMAMI->getOperand(1).getReg();
+ Register Y = FMAMI->getOperand(2).getReg();
+ X = B.buildFPExt(DstType, X).getReg(0);
+ Y = B.buildFPExt(DstType, Y).getReg(0);
+ buildMatchInfo(FMulMI->getOperand(1).getReg(),
+ FMulMI->getOperand(2).getReg(),
+ RHS->getOperand(0).getReg(), X, Y, B);
+ };
+
+ return true;
+ }
+ }
+
+ // fold (fadd z, (fma x, y, (fpext (fmul u, v)))
+ // -> (fma x, y, (fma (fpext u), (fpext v), z))
+ if (RHS->getOpcode() == PreferredFusedOpcode &&
+ mi_match(RHS->getOperand(3).getReg(), MRI, m_GFPExt(m_MInstr(FMulMI))) &&
+ isContractableFMul(*FMulMI, AllowFusionGlobally) &&
+ TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
+ MRI.getType(FMulMI->getOperand(0).getReg()))) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ buildMatchInfo(FMulMI->getOperand(1).getReg(),
+ FMulMI->getOperand(2).getReg(),
+ LHS->getOperand(0).getReg(), RHS->getOperand(1).getReg(),
+ RHS->getOperand(2).getReg(), B);
+ };
+ return true;
+ }
+
+ // fold (fadd z, (fpext (fma x, y, (fmul u, v)))
+ // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
+ // FIXME: This turns two single-precision and one double-precision
+ // operation into two double-precision operations, which might not be
+ // interesting for all targets, especially GPUs.
+ if (mi_match(RHS->getOperand(0).getReg(), MRI, m_GFPExt(m_MInstr(FMAMI))) &&
+ FMAMI->getOpcode() == PreferredFusedOpcode) {
+ MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
+ if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
+ TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
+ MRI.getType(FMAMI->getOperand(0).getReg()))) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ Register X = FMAMI->getOperand(1).getReg();
+ Register Y = FMAMI->getOperand(2).getReg();
+ X = B.buildFPExt(DstType, X).getReg(0);
+ Y = B.buildFPExt(DstType, Y).getReg(0);
+ buildMatchInfo(FMulMI->getOperand(1).getReg(),
+ FMulMI->getOperand(2).getReg(),
+ LHS->getOperand(0).getReg(), X, Y, B);
+ };
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchCombineFSubFMulToFMadOrFMA(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_FSUB);
+
+ bool AllowFusionGlobally, HasFMAD, Aggressive;
+ if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
+ return false;
+
+ MachineInstr *LHS = MRI.getVRegDef(MI.getOperand(1).getReg());
+ MachineInstr *RHS = MRI.getVRegDef(MI.getOperand(2).getReg());
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+
+ // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
+ // prefer to fold the multiply with fewer uses.
+ int FirstMulHasFewerUses = true;
+ if (isContractableFMul(*LHS, AllowFusionGlobally) &&
+ isContractableFMul(*RHS, AllowFusionGlobally) &&
+ hasMoreUses(*LHS, *RHS, MRI))
+ FirstMulHasFewerUses = false;
+
+ unsigned PreferredFusedOpcode =
+ HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
+
+ // fold (fsub (fmul x, y), z) -> (fma x, y, -z)
+ if (FirstMulHasFewerUses &&
+ (isContractableFMul(*LHS, AllowFusionGlobally) &&
+ (Aggressive || MRI.hasOneNonDBGUse(LHS->getOperand(0).getReg())))) {
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ Register NegZ = B.buildFNeg(DstTy, RHS->getOperand(0).getReg()).getReg(0);
+ B.buildInstr(
+ PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {LHS->getOperand(1).getReg(), LHS->getOperand(2).getReg(), NegZ});
+ };
+ return true;
+ }
+ // fold (fsub x, (fmul y, z)) -> (fma -y, z, x)
+ else if ((isContractableFMul(*RHS, AllowFusionGlobally) &&
+ (Aggressive || MRI.hasOneNonDBGUse(RHS->getOperand(0).getReg())))) {
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ Register NegY = B.buildFNeg(DstTy, RHS->getOperand(1).getReg()).getReg(0);
+ B.buildInstr(
+ PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {NegY, RHS->getOperand(2).getReg(), LHS->getOperand(0).getReg()});
+ };
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchCombineFSubFNegFMulToFMadOrFMA(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_FSUB);
+
+ bool AllowFusionGlobally, HasFMAD, Aggressive;
+ if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
+ return false;
+
+ Register LHSReg = MI.getOperand(1).getReg();
+ Register RHSReg = MI.getOperand(2).getReg();
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+
+ unsigned PreferredFusedOpcode =
+ HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
+
+ MachineInstr *FMulMI;
+ // fold (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
+ if (mi_match(LHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
+ (Aggressive || (MRI.hasOneNonDBGUse(LHSReg) &&
+ MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
+ isContractableFMul(*FMulMI, AllowFusionGlobally)) {
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ Register NegX =
+ B.buildFNeg(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
+ Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
+ B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {NegX, FMulMI->getOperand(2).getReg(), NegZ});
+ };
+ return true;
+ }
+
+ // fold (fsub x, (fneg (fmul, y, z))) -> (fma y, z, x)
+ if (mi_match(RHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
+ (Aggressive || (MRI.hasOneNonDBGUse(RHSReg) &&
+ MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
+ isContractableFMul(*FMulMI, AllowFusionGlobally)) {
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {FMulMI->getOperand(1).getReg(),
+ FMulMI->getOperand(2).getReg(), LHSReg});
+ };
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchCombineFSubFpExtFMulToFMadOrFMA(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_FSUB);
+
+ bool AllowFusionGlobally, HasFMAD, Aggressive;
+ if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
+ return false;
+
+ Register LHSReg = MI.getOperand(1).getReg();
+ Register RHSReg = MI.getOperand(2).getReg();
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+
+ unsigned PreferredFusedOpcode =
+ HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
+
+ MachineInstr *FMulMI;
+ // fold (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z))
+ if (mi_match(LHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
+ isContractableFMul(*FMulMI, AllowFusionGlobally) &&
+ (Aggressive || MRI.hasOneNonDBGUse(LHSReg))) {
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ Register FpExtX =
+ B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
+ Register FpExtY =
+ B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
+ Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
+ B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {FpExtX, FpExtY, NegZ});
+ };
+ return true;
+ }
+
+ // fold (fsub x, (fpext (fmul y, z))) -> (fma (fneg (fpext y)), (fpext z), x)
+ if (mi_match(RHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
+ isContractableFMul(*FMulMI, AllowFusionGlobally) &&
+ (Aggressive || MRI.hasOneNonDBGUse(RHSReg))) {
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ Register FpExtY =
+ B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
+ Register NegY = B.buildFNeg(DstTy, FpExtY).getReg(0);
+ Register FpExtZ =
+ B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
+ B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {NegY, FpExtZ, LHSReg});
+ };
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchCombineFSubFpExtFNegFMulToFMadOrFMA(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_FSUB);
+
+ bool AllowFusionGlobally, HasFMAD, Aggressive;
+ if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
+ return false;
+
+ const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ Register LHSReg = MI.getOperand(1).getReg();
+ Register RHSReg = MI.getOperand(2).getReg();
+
+ unsigned PreferredFusedOpcode =
+ HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
+
+ auto buildMatchInfo = [=](Register Dst, Register X, Register Y, Register Z,
+ MachineIRBuilder &B) {
+ Register FpExtX = B.buildFPExt(DstTy, X).getReg(0);
+ Register FpExtY = B.buildFPExt(DstTy, Y).getReg(0);
+ B.buildInstr(PreferredFusedOpcode, {Dst}, {FpExtX, FpExtY, Z});
+ };
+
+ MachineInstr *FMulMI;
+ // fold (fsub (fpext (fneg (fmul x, y))), z) ->
+ // (fneg (fma (fpext x), (fpext y), z))
+ // fold (fsub (fneg (fpext (fmul x, y))), z) ->
+ // (fneg (fma (fpext x), (fpext y), z))
+ if ((mi_match(LHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
+ mi_match(LHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
+ isContractableFMul(*FMulMI, AllowFusionGlobally) &&
+ TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
+ MRI.getType(FMulMI->getOperand(0).getReg()))) {
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ Register FMAReg = MRI.createGenericVirtualRegister(DstTy);
+ buildMatchInfo(FMAReg, FMulMI->getOperand(1).getReg(),
+ FMulMI->getOperand(2).getReg(), RHSReg, B);
+ B.buildFNeg(MI.getOperand(0).getReg(), FMAReg);
+ };
+ return true;
+ }
+
+ // fold (fsub x, (fpext (fneg (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
+ // fold (fsub x, (fneg (fpext (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
+ if ((mi_match(RHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
+ mi_match(RHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
+ isContractableFMul(*FMulMI, AllowFusionGlobally) &&
+ TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
+ MRI.getType(FMulMI->getOperand(0).getReg()))) {
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ buildMatchInfo(MI.getOperand(0).getReg(), FMulMI->getOperand(1).getReg(),
+ FMulMI->getOperand(2).getReg(), LHSReg, B);
+ };
+ return true;
+ }
+
+ return false;
+}
+
bool CombinerHelper::tryCombine(MachineInstr &MI) {
if (tryCombineCopy(MI))
return true;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index c74bec7dfc0d..e09cd26eb0c1 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -585,8 +585,8 @@ simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
// FIXME: What does the original arg index mean here?
SmallVector<CallLowering::ArgInfo, 3> Args;
- for (unsigned i = 1; i < MI.getNumOperands(); i++)
- Args.push_back({MI.getOperand(i).getReg(), OpType, 0});
+ for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
+ Args.push_back({MO.getReg(), OpType, 0});
return createLibcall(MIRBuilder, Libcall,
{MI.getOperand(0).getReg(), OpType, 0}, Args);
}
@@ -1500,8 +1500,8 @@ LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
// Decompose the original operands if they don't evenly divide.
- for (int I = 1, E = MI.getNumOperands(); I != E; ++I) {
- Register SrcReg = MI.getOperand(I).getReg();
+ for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
+ Register SrcReg = MO.getReg();
if (GCD == SrcSize) {
Unmerges.push_back(SrcReg);
} else {
@@ -4037,8 +4037,8 @@ LegalizerHelper::fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx,
// Break into a common type
SmallVector<Register, 16> Parts;
- for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
- extractGCDType(Parts, GCDTy, MI.getOperand(I).getReg());
+ for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
+ extractGCDType(Parts, GCDTy, MO.getReg());
// Build the requested new merge, padding with undef.
LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts,
@@ -7782,7 +7782,6 @@ LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src,
// of that value loaded. This can result in a sequence of loads and stores
// mixed types, depending on what the target specifies as good types to use.
unsigned CurrOffset = 0;
- LLT PtrTy = MRI.getType(Src);
unsigned Size = KnownLen;
for (auto CopyTy : MemOps) {
// Issuing an unaligned load / store pair that overlaps with the previous
@@ -7800,15 +7799,19 @@ LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src,
Register LoadPtr = Src;
Register Offset;
if (CurrOffset != 0) {
- Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset)
+ LLT SrcTy = MRI.getType(Src);
+ Offset = MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset)
.getReg(0);
- LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0);
+ LoadPtr = MIB.buildPtrAdd(SrcTy, Src, Offset).getReg(0);
}
auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
// Create the store.
- Register StorePtr =
- CurrOffset == 0 ? Dst : MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
+ Register StorePtr = Dst;
+ if (CurrOffset != 0) {
+ LLT DstTy = MRI.getType(Dst);
+ StorePtr = MIB.buildPtrAdd(DstTy, Dst, Offset).getReg(0);
+ }
MIB.buildStore(LdVal, StorePtr, *StoreMMO);
CurrOffset += CopyTy.getSizeInBytes();
Size -= CopyTy.getSizeInBytes();
@@ -7885,7 +7888,6 @@ LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
// Apart from that, this loop is pretty much doing the same thing as the
// memcpy codegen function.
unsigned CurrOffset = 0;
- LLT PtrTy = MRI.getType(Src);
SmallVector<Register, 16> LoadVals;
for (auto CopyTy : MemOps) {
// Construct MMO for the load.
@@ -7895,9 +7897,10 @@ LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
// Create the load.
Register LoadPtr = Src;
if (CurrOffset != 0) {
+ LLT SrcTy = MRI.getType(Src);
auto Offset =
- MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
- LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0);
+ MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset);
+ LoadPtr = MIB.buildPtrAdd(SrcTy, Src, Offset).getReg(0);
}
LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
CurrOffset += CopyTy.getSizeInBytes();
@@ -7912,9 +7915,10 @@ LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
Register StorePtr = Dst;
if (CurrOffset != 0) {
+ LLT DstTy = MRI.getType(Dst);
auto Offset =
- MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
- StorePtr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
+ MIB.buildConstant(LLT::scalar(DstTy.getSizeInBits()), CurrOffset);
+ StorePtr = MIB.buildPtrAdd(DstTy, Dst, Offset).getReg(0);
}
MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
CurrOffset += CopyTy.getSizeInBytes();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
index 1a2102e3ef21..650500c7eb31 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
@@ -123,7 +123,7 @@ const RegisterBank *RegisterBankInfo::getRegBankFromConstraints(
Register Reg = MI.getOperand(OpIdx).getReg();
const RegisterBank &RegBank = getRegBankFromRegClass(*RC, MRI.getType(Reg));
- // Sanity check that the target properly implemented getRegBankFromRegClass.
+ // Check that the target properly implemented getRegBankFromRegClass.
assert(RegBank.covers(*RC) &&
"The mapping of the register bank does not make sense");
return &RegBank;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 1a440c064a59..b0b84763e922 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -834,10 +834,9 @@ bool llvm::isKnownToBeAPowerOfTwo(Register Reg, const MachineRegisterInfo &MRI,
case TargetOpcode::G_BUILD_VECTOR: {
// TODO: Probably should have a recursion depth guard since you could have
// bitcasted vector elements.
- for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
- if (!isKnownToBeAPowerOfTwo(MI.getOperand(I).getReg(), MRI, KB))
+ for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
+ if (!isKnownToBeAPowerOfTwo(MO.getReg(), MRI, KB))
return false;
- }
return true;
}
@@ -845,8 +844,8 @@ bool llvm::isKnownToBeAPowerOfTwo(Register Reg, const MachineRegisterInfo &MRI,
// Only handle constants since we would need to know if number of leading
// zeros is greater than the truncation amount.
const unsigned BitWidth = Ty.getScalarSizeInBits();
- for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
- auto Const = getIConstantVRegVal(MI.getOperand(I).getReg(), MRI);
+ for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
+ auto Const = getIConstantVRegVal(MO.getReg(), MRI);
if (!Const || !Const->zextOrTrunc(BitWidth).isPowerOf2())
return false;
}
@@ -1031,16 +1030,22 @@ Optional<ValueAndVReg> getAnyConstantSplat(Register VReg,
return SplatValAndReg;
}
-bool isBuildVectorConstantSplat(const MachineInstr &MI,
- const MachineRegisterInfo &MRI,
- int64_t SplatValue, bool AllowUndef) {
- if (auto SplatValAndReg =
- getAnyConstantSplat(MI.getOperand(0).getReg(), MRI, AllowUndef))
+} // end anonymous namespace
+
+bool llvm::isBuildVectorConstantSplat(const Register Reg,
+ const MachineRegisterInfo &MRI,
+ int64_t SplatValue, bool AllowUndef) {
+ if (auto SplatValAndReg = getAnyConstantSplat(Reg, MRI, AllowUndef))
return mi_match(SplatValAndReg->VReg, MRI, m_SpecificICst(SplatValue));
return false;
}
-} // end anonymous namespace
+bool llvm::isBuildVectorConstantSplat(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI,
+ int64_t SplatValue, bool AllowUndef) {
+ return isBuildVectorConstantSplat(MI.getOperand(0).getReg(), MRI, SplatValue,
+ AllowUndef);
+}
Optional<int64_t>
llvm::getBuildVectorConstantSplat(const MachineInstr &MI,
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp
index 6c1ce4c1efb0..bbd9006a5d8c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp
@@ -399,8 +399,7 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
// having a single global, but is aggressive enough for any other case.
if (GlobalMergeIgnoreSingleUse) {
BitVector AllGlobals(Globals.size());
- for (size_t i = 0, e = UsedGlobalSets.size(); i != e; ++i) {
- const UsedGlobalSet &UGS = UsedGlobalSets[e - i - 1];
+ for (const UsedGlobalSet &UGS : llvm::reverse(UsedGlobalSets)) {
if (UGS.UsageCount == 0)
continue;
if (UGS.Globals.count() > 1)
@@ -418,8 +417,7 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
BitVector PickedGlobals(Globals.size());
bool Changed = false;
- for (size_t i = 0, e = UsedGlobalSets.size(); i != e; ++i) {
- const UsedGlobalSet &UGS = UsedGlobalSets[e - i - 1];
+ for (const UsedGlobalSet &UGS : llvm::reverse(UsedGlobalSets)) {
if (UGS.UsageCount == 0)
continue;
if (PickedGlobals.anyCommon(UGS.Globals))
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/IndirectBrExpandPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/IndirectBrExpandPass.cpp
index e4606daba352..2d38a44d5a33 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/IndirectBrExpandPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/IndirectBrExpandPass.cpp
@@ -260,10 +260,12 @@ bool IndirectBrExpandPass::runOnFunction(Function &F) {
if (DTU) {
// If there were multiple indirectbr's, they may have common successors,
// but in the dominator tree, we only track unique edges.
- SmallPtrSet<BasicBlock *, 8> UniqueSuccessors(BBs.begin(), BBs.end());
- Updates.reserve(Updates.size() + UniqueSuccessors.size());
- for (BasicBlock *BB : UniqueSuccessors)
- Updates.push_back({DominatorTree::Insert, SwitchBB, BB});
+ SmallPtrSet<BasicBlock *, 8> UniqueSuccessors;
+ Updates.reserve(Updates.size() + BBs.size());
+ for (BasicBlock *BB : BBs) {
+ if (UniqueSuccessors.insert(BB).second)
+ Updates.push_back({DominatorTree::Insert, SwitchBB, BB});
+ }
DTU->applyUpdates(Updates);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp
index 64e1f4351456..fc5ac45752ca 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -274,11 +274,9 @@ static Register isFullCopyOf(const MachineInstr &MI, Register Reg) {
}
static void getVDefInterval(const MachineInstr &MI, LiveIntervals &LIS) {
- for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
- const MachineOperand &MO = MI.getOperand(I);
+ for (const MachineOperand &MO : MI.operands())
if (MO.isReg() && MO.isDef() && Register::isVirtualRegister(MO.getReg()))
LIS.getInterval(MO.getReg());
- }
}
/// isSnippet - Identify if a live interval is a snippet that should be spilled.
@@ -583,11 +581,9 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) {
if (!ParentVNI) {
LLVM_DEBUG(dbgs() << "\tadding <undef> flags: ");
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI.getOperand(i);
+ for (MachineOperand &MO : MI.operands())
if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg())
MO.setIsUndef();
- }
LLVM_DEBUG(dbgs() << UseIdx << '\t' << MI);
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LatencyPriorityQueue.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LatencyPriorityQueue.cpp
index c3e0553418a5..fab6b8d10a33 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LatencyPriorityQueue.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LatencyPriorityQueue.cpp
@@ -73,11 +73,9 @@ void LatencyPriorityQueue::push(SUnit *SU) {
// Look at all of the successors of this node. Count the number of nodes that
// this node is the sole unscheduled node for.
unsigned NumNodesBlocking = 0;
- for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
- I != E; ++I) {
- if (getSingleUnscheduledPred(I->getSUnit()) == SU)
+ for (const SDep &Succ : SU->Succs)
+ if (getSingleUnscheduledPred(Succ.getSUnit()) == SU)
++NumNodesBlocking;
- }
NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking;
Queue.push_back(SU);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
index a4eb3094612b..cf62b0e5d7e8 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
@@ -544,8 +544,7 @@ public:
// Re-state the variable location: if there's no replacement then NewLoc
// is None and a $noreg DBG_VALUE will be created. Otherwise, a DBG_VALUE
// identifying the alternative location will be emitted.
- const DIExpression *Expr = ActiveVLocIt->second.Properties.DIExpr;
- DbgValueProperties Properties(Expr, false);
+ const DbgValueProperties &Properties = ActiveVLocIt->second.Properties;
PendingDbgValues.push_back(MTracker->emitLoc(NewLoc, Var, Properties));
// Update machine locations <=> variable locations maps. Defer updating
@@ -836,6 +835,15 @@ MachineInstrBuilder MLocTracker::emitLoc(Optional<LocIdx> MLoc,
unsigned Base = Spill.SpillBase;
MIB.addReg(Base);
MIB.addImm(0);
+
+ // Being on the stack makes this location indirect; if it was _already_
+ // indirect though, we need to add extra indirection. See this test for
+ // a scenario where this happens:
+ // llvm/test/DebugInfo/X86/spill-nontrivial-param.ll
+ if (Properties.Indirect) {
+ std::vector<uint64_t> Elts = {dwarf::DW_OP_deref};
+ Expr = DIExpression::append(Expr, Elts);
+ }
} else {
// This is a stack location with a weird subregister offset: emit an undef
// DBG_VALUE instead.
@@ -1288,6 +1296,24 @@ void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) {
} else if (MI.isMetaInstruction())
return;
+ // We always ignore SP defines on call instructions, they don't actually
+ // change the value of the stack pointer... except for win32's _chkstk. This
+ // is rare: filter quickly for the common case (no stack adjustments, not a
+ // call, etc). If it is a call that modifies SP, recognise the SP register
+ // defs.
+ bool CallChangesSP = false;
+ if (AdjustsStackInCalls && MI.isCall() && MI.getOperand(0).isSymbol() &&
+ !strcmp(MI.getOperand(0).getSymbolName(), StackProbeSymbolName.data()))
+ CallChangesSP = true;
+
+ // Test whether we should ignore a def of this register due to it being part
+ // of the stack pointer.
+ auto IgnoreSPAlias = [this, &MI, CallChangesSP](Register R) -> bool {
+ if (CallChangesSP)
+ return false;
+ return MI.isCall() && MTracker->SPAliases.count(R);
+ };
+
// Find the regs killed by MI, and find regmasks of preserved regs.
// Max out the number of statically allocated elements in `DeadRegs`, as this
// prevents fallback to std::set::count() operations.
@@ -1298,7 +1324,7 @@ void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) {
// Determine whether the operand is a register def.
if (MO.isReg() && MO.isDef() && MO.getReg() &&
Register::isPhysicalRegister(MO.getReg()) &&
- !(MI.isCall() && MTracker->SPAliases.count(MO.getReg()))) {
+ !IgnoreSPAlias(MO.getReg())) {
// Remove ranges of all aliased registers.
for (MCRegAliasIterator RAI(MO.getReg(), TRI, true); RAI.isValid(); ++RAI)
// FIXME: Can we break out of this loop early if no insertion occurs?
@@ -1347,6 +1373,9 @@ void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) {
continue;
Register Reg = MTracker->LocIdxToLocID[L.Idx];
+ if (IgnoreSPAlias(Reg))
+ continue;
+
for (auto *MO : RegMaskPtrs)
if (MO->clobbersPhysReg(Reg))
TTracker->clobberMloc(L.Idx, MI.getIterator(), false);
@@ -1628,9 +1657,10 @@ bool InstrRefBasedLDV::transferRegisterCopy(MachineInstr &MI) {
/// fragments of that DILocalVariable which overlap. This reduces work during
/// the data-flow stage from "Find any overlapping fragments" to "Check if the
/// known-to-overlap fragments are present".
-/// \param MI A previously unprocessed DEBUG_VALUE instruction to analyze for
+/// \param MI A previously unprocessed debug instruction to analyze for
/// fragment usage.
void InstrRefBasedLDV::accumulateFragmentMap(MachineInstr &MI) {
+ assert(MI.isDebugValue() || MI.isDebugRef());
DebugVariable MIVar(MI.getDebugVariable(), MI.getDebugExpression(),
MI.getDebugLoc()->getInlinedAt());
FragmentInfo ThisFragment = MIVar.getFragmentOrDefault();
@@ -1732,7 +1762,7 @@ void InstrRefBasedLDV::produceMLocTransferFunction(
for (auto &MI : MBB) {
process(MI);
// Also accumulate fragment map.
- if (MI.isDebugValue())
+ if (MI.isDebugValue() || MI.isDebugRef())
accumulateFragmentMap(MI);
// Create a map from the instruction number (if present) to the
@@ -2322,15 +2352,8 @@ Optional<ValueIDNum> InstrRefBasedLDV::pickVPHILoc(
bool InstrRefBasedLDV::vlocJoin(
MachineBasicBlock &MBB, LiveIdxT &VLOCOutLocs,
- SmallPtrSet<const MachineBasicBlock *, 8> &InScopeBlocks,
SmallPtrSet<const MachineBasicBlock *, 8> &BlocksToExplore,
DbgValue &LiveIn) {
- // To emulate VarLocBasedImpl, process this block if it's not in scope but
- // _does_ assign a variable value. No live-ins for this scope are transferred
- // in though, so we can return immediately.
- if (InScopeBlocks.count(&MBB) == 0 && !ArtificialBlocks.count(&MBB))
- return false;
-
LLVM_DEBUG(dbgs() << "join MBB: " << MBB.getNumber() << "\n");
bool Changed = false;
@@ -2466,11 +2489,10 @@ void InstrRefBasedLDV::buildVLocValueMap(const DILocation *DILoc,
// "blocks that are potentially in scope. See comment at start of vlocJoin.
SmallPtrSet<const MachineBasicBlock *, 8> InScopeBlocks = BlocksToExplore;
- // Old LiveDebugValues tracks variable locations that come out of blocks
- // not in scope, where DBG_VALUEs occur. This is something we could
- // legitimately ignore, but lets allow it for now.
- if (EmulateOldLDV)
- BlocksToExplore.insert(AssignBlocks.begin(), AssignBlocks.end());
+ // VarLoc LiveDebugValues tracks variable locations that are defined in
+ // blocks not in scope. This is something we could legitimately ignore, but
+ // lets allow it for now for the sake of coverage.
+ BlocksToExplore.insert(AssignBlocks.begin(), AssignBlocks.end());
// We also need to propagate variable values through any artificial blocks
// that immediately follow blocks in scope.
@@ -2635,7 +2657,7 @@ void InstrRefBasedLDV::buildVLocValueMap(const DILocation *DILoc,
// Join values from predecessors. Updates LiveInIdx, and writes output
// into JoinedInLocs.
bool InLocsChanged =
- vlocJoin(*MBB, LiveOutIdx, InScopeBlocks, BlocksToExplore, *LiveIn);
+ vlocJoin(*MBB, LiveOutIdx, BlocksToExplore, *LiveIn);
SmallVector<const MachineBasicBlock *, 8> Preds;
for (const auto *Pred : MBB->predecessors())
@@ -2730,6 +2752,8 @@ void InstrRefBasedLDV::buildVLocValueMap(const DILocation *DILoc,
continue;
if (BlockLiveIn->Kind == DbgValue::VPHI)
BlockLiveIn->Kind = DbgValue::Def;
+ assert(BlockLiveIn->Properties.DIExpr->getFragmentInfo() ==
+ Var.getFragment() && "Fragment info missing during value prop");
Output[MBB->getNumber()].push_back(std::make_pair(Var, *BlockLiveIn));
}
} // Per-variable loop.
@@ -2879,6 +2903,12 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
MFI = &MF.getFrameInfo();
LS.initialize(MF);
+ const auto &STI = MF.getSubtarget();
+ AdjustsStackInCalls = MFI->adjustsStack() &&
+ STI.getFrameLowering()->stackProbeFunctionModifiesSP();
+ if (AdjustsStackInCalls)
+ StackProbeSymbolName = STI.getTargetLowering()->getStackProbeSymbolName(MF);
+
MTracker =
new MLocTracker(MF, *TII, *TRI, *MF.getSubtarget().getTargetLowering());
VTracker = nullptr;
@@ -2895,7 +2925,7 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
++MaxNumBlocks;
MLocTransfer.resize(MaxNumBlocks);
- vlocs.resize(MaxNumBlocks);
+ vlocs.resize(MaxNumBlocks, VLocTracker(OverlapFragments, EmptyExpr));
SavedLiveIns.resize(MaxNumBlocks);
initialSetup(MF);
@@ -3040,6 +3070,8 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
BBNumToRPO.clear();
DebugInstrNumToInstr.clear();
DebugPHINumToValue.clear();
+ OverlapFragments.clear();
+ SeenFragments.clear();
return Changed;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h
index d96ef6d4f6e5..789205e61cdb 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h
@@ -655,6 +655,14 @@ public:
const DbgValueProperties &Properties);
};
+/// Types for recording sets of variable fragments that overlap. For a given
+/// local variable, we record all other fragments of that variable that could
+/// overlap it, to reduce search time.
+using FragmentOfVar =
+ std::pair<const DILocalVariable *, DIExpression::FragmentInfo>;
+using OverlapMap =
+ DenseMap<FragmentOfVar, SmallVector<DIExpression::FragmentInfo, 1>>;
+
/// Collection of DBG_VALUEs observed when traversing a block. Records each
/// variable and the value the DBG_VALUE refers to. Requires the machine value
/// location dataflow algorithm to have run already, so that values can be
@@ -672,9 +680,12 @@ public:
MapVector<DebugVariable, DbgValue> Vars;
DenseMap<DebugVariable, const DILocation *> Scopes;
MachineBasicBlock *MBB = nullptr;
+ const OverlapMap &OverlappingFragments;
+ DbgValueProperties EmptyProperties;
public:
- VLocTracker() {}
+ VLocTracker(const OverlapMap &O, const DIExpression *EmptyExpr)
+ : OverlappingFragments(O), EmptyProperties(EmptyExpr, false) {}
void defVar(const MachineInstr &MI, const DbgValueProperties &Properties,
Optional<ValueIDNum> ID) {
@@ -689,6 +700,8 @@ public:
if (!Result.second)
Result.first->second = Rec;
Scopes[Var] = MI.getDebugLoc().get();
+
+ considerOverlaps(Var, MI.getDebugLoc().get());
}
void defVar(const MachineInstr &MI, const MachineOperand &MO) {
@@ -704,16 +717,37 @@ public:
if (!Result.second)
Result.first->second = Rec;
Scopes[Var] = MI.getDebugLoc().get();
+
+ considerOverlaps(Var, MI.getDebugLoc().get());
}
-};
-/// Types for recording sets of variable fragments that overlap. For a given
-/// local variable, we record all other fragments of that variable that could
-/// overlap it, to reduce search time.
-using FragmentOfVar =
- std::pair<const DILocalVariable *, DIExpression::FragmentInfo>;
-using OverlapMap =
- DenseMap<FragmentOfVar, SmallVector<DIExpression::FragmentInfo, 1>>;
+ void considerOverlaps(const DebugVariable &Var, const DILocation *Loc) {
+ auto Overlaps = OverlappingFragments.find(
+ {Var.getVariable(), Var.getFragmentOrDefault()});
+ if (Overlaps == OverlappingFragments.end())
+ return;
+
+ // Otherwise: terminate any overlapped variable locations.
+ for (auto FragmentInfo : Overlaps->second) {
+ // The "empty" fragment is stored as DebugVariable::DefaultFragment, so
+ // that it overlaps with everything, however its cannonical representation
+ // in a DebugVariable is as "None".
+ Optional<DIExpression::FragmentInfo> OptFragmentInfo = FragmentInfo;
+ if (DebugVariable::isDefaultFragment(FragmentInfo))
+ OptFragmentInfo = None;
+
+ DebugVariable Overlapped(Var.getVariable(), OptFragmentInfo,
+ Var.getInlinedAt());
+ DbgValue Rec = DbgValue(EmptyProperties, DbgValue::Undef);
+
+ // Attempt insertion; overwrite if it's already mapped.
+ auto Result = Vars.insert(std::make_pair(Overlapped, Rec));
+ if (!Result.second)
+ Result.first->second = Rec;
+ Scopes[Overlapped] = Loc;
+ }
+ }
+};
// XXX XXX docs
class InstrRefBasedLDV : public LDVImpl {
@@ -817,6 +851,16 @@ private:
OverlapMap OverlapFragments;
VarToFragments SeenFragments;
+ /// True if we need to examine call instructions for stack clobbers. We
+ /// normally assume that they don't clobber SP, but stack probes on Windows
+ /// do.
+ bool AdjustsStackInCalls = false;
+
+ /// If AdjustsStackInCalls is true, this holds the name of the target's stack
+ /// probe function, which is the function we expect will alter the stack
+ /// pointer.
+ StringRef StackProbeSymbolName;
+
/// Tests whether this instruction is a spill to a stack slot.
bool isSpillInstruction(const MachineInstr &MI, MachineFunction *MF);
@@ -962,7 +1006,6 @@ private:
/// \returns true if any live-ins change value, either from value propagation
/// or PHI elimination.
bool vlocJoin(MachineBasicBlock &MBB, LiveIdxT &VLOCOutLocs,
- SmallPtrSet<const MachineBasicBlock *, 8> &InScopeBlocks,
SmallPtrSet<const MachineBasicBlock *, 8> &BlocksToExplore,
DbgValue &LiveIn);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp
index dcd546f9c6db..5f976bf43c5b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp
@@ -1875,34 +1875,57 @@ void LDVImpl::emitDebugValues(VirtRegMap *VRM) {
LLVM_DEBUG(dbgs() << "********** EMITTING INSTR REFERENCES **********\n");
- // Re-insert any debug instrs back in the position they were. Ordering
- // is preserved by vector. We must re-insert in the same order to ensure that
- // debug instructions don't swap, which could re-order assignments.
- for (auto &P : StashedDebugInstrs) {
- SlotIndex Idx = P.Idx;
+ // Re-insert any debug instrs back in the position they were. We must
+ // re-insert in the same order to ensure that debug instructions don't swap,
+ // which could re-order assignments. Do so in a batch -- once we find the
+ // insert position, insert all instructions at the same SlotIdx. They are
+ // guaranteed to appear in-sequence in StashedDebugInstrs because we insert
+ // them in order.
+ for (auto StashIt = StashedDebugInstrs.begin();
+ StashIt != StashedDebugInstrs.end(); ++StashIt) {
+ SlotIndex Idx = StashIt->Idx;
+ MachineBasicBlock *MBB = StashIt->MBB;
+ MachineInstr *MI = StashIt->MI;
+
+ auto EmitInstsHere = [this, &StashIt, MBB, Idx,
+ MI](MachineBasicBlock::iterator InsertPos) {
+ // Insert this debug instruction.
+ MBB->insert(InsertPos, MI);
+
+ // Look at subsequent stashed debug instructions: if they're at the same
+ // index, insert those too.
+ auto NextItem = std::next(StashIt);
+ while (NextItem != StashedDebugInstrs.end() && NextItem->Idx == Idx) {
+ assert(NextItem->MBB == MBB && "Instrs with same slot index should be"
+ "in the same block");
+ MBB->insert(InsertPos, NextItem->MI);
+ StashIt = NextItem;
+ NextItem = std::next(StashIt);
+ };
+ };
// Start block index: find the first non-debug instr in the block, and
// insert before it.
- if (Idx == Slots->getMBBStartIdx(P.MBB)) {
+ if (Idx == Slots->getMBBStartIdx(MBB)) {
MachineBasicBlock::iterator InsertPos =
- findInsertLocation(P.MBB, Idx, *LIS, BBSkipInstsMap);
- P.MBB->insert(InsertPos, P.MI);
+ findInsertLocation(MBB, Idx, *LIS, BBSkipInstsMap);
+ EmitInstsHere(InsertPos);
continue;
}
if (MachineInstr *Pos = Slots->getInstructionFromIndex(Idx)) {
// Insert at the end of any debug instructions.
auto PostDebug = std::next(Pos->getIterator());
- PostDebug = skipDebugInstructionsForward(PostDebug, P.MBB->instr_end());
- P.MBB->insert(PostDebug, P.MI);
+ PostDebug = skipDebugInstructionsForward(PostDebug, MBB->instr_end());
+ EmitInstsHere(PostDebug);
} else {
// Insert position disappeared; walk forwards through slots until we
// find a new one.
- SlotIndex End = Slots->getMBBEndIdx(P.MBB);
+ SlotIndex End = Slots->getMBBEndIdx(MBB);
for (; Idx < End; Idx = Slots->getNextNonNullIndex(Idx)) {
Pos = Slots->getInstructionFromIndex(Idx);
if (Pos) {
- P.MBB->insert(Pos->getIterator(), P.MI);
+ EmitInstsHere(Pos->getIterator());
break;
}
}
@@ -1911,8 +1934,8 @@ void LDVImpl::emitDebugValues(VirtRegMap *VRM) {
// insert! It's not safe to discard any debug instructions; place them
// in front of the first terminator, or in front of end().
if (Idx >= End) {
- auto TermIt = P.MBB->getFirstTerminator();
- P.MBB->insert(TermIt, P.MI);
+ auto TermIt = MBB->getFirstTerminator();
+ EmitInstsHere(TermIt);
}
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp
index d91ff734ad8f..6380c4bfd6e6 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp
@@ -108,8 +108,7 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI,
SlotIndex UseIdx) const {
OrigIdx = OrigIdx.getRegSlot(true);
UseIdx = std::max(UseIdx, UseIdx.getRegSlot(true));
- for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = OrigMI->getOperand(i);
+ for (const MachineOperand &MO : OrigMI->operands()) {
if (!MO.isReg() || !MO.getReg() || !MO.readsReg())
continue;
@@ -425,15 +424,8 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr *> &Dead,
// The new intervals would have to be spilled anyway so its not worth it.
// Also they currently aren't spilled so creating them and not spilling
// them results in incorrect code.
- bool BeingSpilled = false;
- for (unsigned i = 0, e = RegsBeingSpilled.size(); i != e; ++i) {
- if (VReg == RegsBeingSpilled[i]) {
- BeingSpilled = true;
- break;
- }
- }
-
- if (BeingSpilled) continue;
+ if (llvm::is_contained(RegsBeingSpilled, VReg))
+ continue;
// LI may have been separated, create new intervals.
LI->RenumberValues();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeUtils.h b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeUtils.h
index dace05f1ad95..ada5c5be484a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeUtils.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeUtils.h
@@ -18,7 +18,7 @@
namespace llvm {
/// Helper function that distributes live range value numbers and the
-/// corresponding segments of a master live range \p LR to a list of newly
+/// corresponding segments of a primary live range \p LR to a list of newly
/// created live ranges \p SplitLRs. \p VNIClasses maps each value number in \p
/// LR to 0 meaning it should stay or to 1..N meaning it should go to a specific
/// live range in the \p SplitLRs array.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp
index 51ba4b7e53eb..e8744797707b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp
@@ -58,9 +58,9 @@ void LiveVariables::getAnalysisUsage(AnalysisUsage &AU) const {
MachineInstr *
LiveVariables::VarInfo::findKill(const MachineBasicBlock *MBB) const {
- for (unsigned i = 0, e = Kills.size(); i != e; ++i)
- if (Kills[i]->getParent() == MBB)
- return Kills[i];
+ for (MachineInstr *MI : Kills)
+ if (MI->getParent() == MBB)
+ return MI;
return nullptr;
}
@@ -811,8 +811,8 @@ bool LiveVariables::isLiveOut(Register Reg, const MachineBasicBlock &MBB) {
LiveVariables::VarInfo &VI = getVarInfo(Reg);
SmallPtrSet<const MachineBasicBlock *, 8> Kills;
- for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i)
- Kills.insert(VI.Kills[i]->getParent());
+ for (MachineInstr *MI : VI.Kills)
+ Kills.insert(MI->getParent());
// Loop over all of the successors of the basic block, checking to see if
// the value is either live in the block, or if it is killed in the block.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
index 2e99c8595cbd..ee2387d1e8e6 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -316,14 +316,14 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
// than that, but the increased register pressure makes that a
// tricky thing to balance. Investigate if re-materializing these
// becomes an issue.
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ for (const MachineOperand &MO : MI.operands()) {
// Consider replacing all frame index operands that reference
// an object allocated in the local block.
- if (MI.getOperand(i).isFI()) {
+ if (MO.isFI()) {
// Don't try this with values not in the local block.
- if (!MFI.isObjectPreAllocated(MI.getOperand(i).getIndex()))
+ if (!MFI.isObjectPreAllocated(MO.getIndex()))
break;
- int Idx = MI.getOperand(i).getIndex();
+ int Idx = MO.getIndex();
int64_t LocalOffset = LocalOffsets[Idx];
if (!TRI->needsFrameBaseReg(&MI, LocalOffset))
break;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRSampleProfile.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRSampleProfile.cpp
index 90ecc6fc68fc..b742ad9823c9 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRSampleProfile.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRSampleProfile.cpp
@@ -314,6 +314,8 @@ bool MIRProfileLoaderPass::runOnMachineFunction(MachineFunction &MF) {
}
bool Changed = MIRSampleLoader->runOnFunction(MF);
+ if (Changed)
+ MBFI->calculate(MF, *MBFI->getMBPI(), *&getAnalysis<MachineLoopInfo>());
if (ViewBFIAfter && ViewBlockLayoutWithBFI != GVDT_None &&
(ViewBlockFreqFuncName.empty() ||
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp
index 366d06871245..310c2721c3bd 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp
@@ -1170,9 +1170,10 @@ auto MachineFunction::salvageCopySSA(MachineInstr &MI)
void MachineFunction::finalizeDebugInstrRefs() {
auto *TII = getSubtarget().getInstrInfo();
- auto MakeDbgValue = [&](MachineInstr &MI) {
+ auto MakeUndefDbgValue = [&](MachineInstr &MI) {
const MCInstrDesc &RefII = TII->get(TargetOpcode::DBG_VALUE);
MI.setDesc(RefII);
+ MI.getOperand(0).setReg(0);
MI.getOperand(1).ChangeToRegister(0, false);
};
@@ -1187,15 +1188,15 @@ void MachineFunction::finalizeDebugInstrRefs() {
Register Reg = MI.getOperand(0).getReg();
// Some vregs can be deleted as redundant in the meantime. Mark those
- // as DBG_VALUE $noreg.
- if (Reg == 0) {
- MakeDbgValue(MI);
+ // as DBG_VALUE $noreg. Additionally, some normal instructions are
+ // quickly deleted, leaving dangling references to vregs with no def.
+ if (Reg == 0 || !RegInfo->hasOneDef(Reg)) {
+ MakeUndefDbgValue(MI);
continue;
}
assert(Reg.isVirtual());
MachineInstr &DefMI = *RegInfo->def_instr_begin(Reg);
- assert(RegInfo->hasOneDef(Reg));
// If we've found a copy-like instruction, follow it back to the
// instruction that defines the source value, see salvageCopySSA docs
@@ -1327,9 +1328,9 @@ bool MachineJumpTableInfo::ReplaceMBBInJumpTable(unsigned Idx,
assert(Old != New && "Not making a change?");
bool MadeChange = false;
MachineJumpTableEntry &JTE = JumpTables[Idx];
- for (size_t j = 0, e = JTE.MBBs.size(); j != e; ++j)
- if (JTE.MBBs[j] == Old) {
- JTE.MBBs[j] = New;
+ for (MachineBasicBlock *&MBB : JTE.MBBs)
+ if (MBB == Old) {
+ MBB = New;
MadeChange = true;
}
return MadeChange;
@@ -1342,8 +1343,8 @@ void MachineJumpTableInfo::print(raw_ostream &OS) const {
for (unsigned i = 0, e = JumpTables.size(); i != e; ++i) {
OS << printJumpTableEntryReference(i) << ':';
- for (unsigned j = 0, f = JumpTables[i].MBBs.size(); j != f; ++j)
- OS << ' ' << printMBBReference(*JumpTables[i].MBBs[j]);
+ for (const MachineBasicBlock *MBB : JumpTables[i].MBBs)
+ OS << ' ' << printMBBReference(*MBB);
if (i != e)
OS << '\n';
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp
index 5c4f75e9ceb9..aaa80432d2f2 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp
@@ -1490,12 +1490,10 @@ bool MachineInstr::allDefsAreDead() const {
/// instruction to this instruction.
void MachineInstr::copyImplicitOps(MachineFunction &MF,
const MachineInstr &MI) {
- for (unsigned i = MI.getDesc().getNumOperands(), e = MI.getNumOperands();
- i != e; ++i) {
- const MachineOperand &MO = MI.getOperand(i);
+ for (const MachineOperand &MO :
+ llvm::drop_begin(MI.operands(), MI.getDesc().getNumOperands()))
if ((MO.isReg() && MO.isImplicit()) || MO.isRegMask())
addOperand(MF, MO);
- }
}
bool MachineInstr::hasComplexRegisterTies() const {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp
index 4d080e1a4f82..680dbe54ffaf 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp
@@ -1071,7 +1071,9 @@ void MachineMemOperand::refineAlignment(const MachineMemOperand *MMO) {
// The Value and Offset may differ due to CSE. But the flags and size
// should be the same.
assert(MMO->getFlags() == getFlags() && "Flags mismatch!");
- assert(MMO->getSize() == getSize() && "Size mismatch!");
+ assert((MMO->getSize() == ~UINT64_C(0) || getSize() == ~UINT64_C(0) ||
+ MMO->getSize() == getSize()) &&
+ "Size mismatch!");
if (MMO->getBaseAlign() >= getBaseAlign()) {
// Update the alignment value.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp
index cfbccebaff3e..7783b5e0d3cc 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp
@@ -617,20 +617,11 @@ MachineFunction *MachineOutliner::createOutlinedFunction(
F->addFnAttr(Attribute::OptimizeForSize);
F->addFnAttr(Attribute::MinSize);
- // Include target features from an arbitrary candidate for the outlined
- // function. This makes sure the outlined function knows what kinds of
- // instructions are going into it. This is fine, since all parent functions
- // must necessarily support the instructions that are in the outlined region.
Candidate &FirstCand = OF.Candidates.front();
- const Function &ParentFn = FirstCand.getMF()->getFunction();
- if (ParentFn.hasFnAttribute("target-features"))
- F->addFnAttr(ParentFn.getFnAttribute("target-features"));
+ const TargetInstrInfo &TII =
+ *FirstCand.getMF()->getSubtarget().getInstrInfo();
- // Set nounwind, so we don't generate eh_frame.
- if (llvm::all_of(OF.Candidates, [](const outliner::Candidate &C) {
- return C.getMF()->getFunction().hasFnAttribute(Attribute::NoUnwind);
- }))
- F->addFnAttr(Attribute::NoUnwind);
+ TII.mergeOutliningCandidateAttributes(*F, OF.Candidates);
BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F);
IRBuilder<> Builder(EntryBB);
@@ -639,8 +630,6 @@ MachineFunction *MachineOutliner::createOutlinedFunction(
MachineModuleInfo &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
MachineFunction &MF = MMI.getOrCreateMachineFunction(*F);
MachineBasicBlock &MBB = *MF.CreateMachineBasicBlock();
- const TargetSubtargetInfo &STI = MF.getSubtarget();
- const TargetInstrInfo &TII = *STI.getInstrInfo();
// Insert the new function into the module.
MF.insert(MF.begin(), &MBB);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp
index e18318386def..8d6459a627fa 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -1455,17 +1455,15 @@ void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
int asap = 0;
int zeroLatencyDepth = 0;
SUnit *SU = &SUnits[I];
- for (SUnit::const_pred_iterator IP = SU->Preds.begin(),
- EP = SU->Preds.end();
- IP != EP; ++IP) {
- SUnit *pred = IP->getSUnit();
- if (IP->getLatency() == 0)
+ for (const SDep &P : SU->Preds) {
+ SUnit *pred = P.getSUnit();
+ if (P.getLatency() == 0)
zeroLatencyDepth =
std::max(zeroLatencyDepth, getZeroLatencyDepth(pred) + 1);
- if (ignoreDependence(*IP, true))
+ if (ignoreDependence(P, true))
continue;
- asap = std::max(asap, (int)(getASAP(pred) + IP->getLatency() -
- getDistance(pred, SU, *IP) * MII));
+ asap = std::max(asap, (int)(getASAP(pred) + P.getLatency() -
+ getDistance(pred, SU, P) * MII));
}
maxASAP = std::max(maxASAP, asap);
ScheduleInfo[I].ASAP = asap;
@@ -1521,9 +1519,8 @@ static bool pred_L(SetVector<SUnit *> &NodeOrder,
SmallSetVector<SUnit *, 8> &Preds,
const NodeSet *S = nullptr) {
Preds.clear();
- for (SetVector<SUnit *>::iterator I = NodeOrder.begin(), E = NodeOrder.end();
- I != E; ++I) {
- for (const SDep &Pred : (*I)->Preds) {
+ for (const SUnit *SU : NodeOrder) {
+ for (const SDep &Pred : SU->Preds) {
if (S && S->count(Pred.getSUnit()) == 0)
continue;
if (ignoreDependence(Pred, true))
@@ -1532,7 +1529,7 @@ static bool pred_L(SetVector<SUnit *> &NodeOrder,
Preds.insert(Pred.getSUnit());
}
// Back-edges are predecessors with an anti-dependence.
- for (const SDep &Succ : (*I)->Succs) {
+ for (const SDep &Succ : SU->Succs) {
if (Succ.getKind() != SDep::Anti)
continue;
if (S && S->count(Succ.getSUnit()) == 0)
@@ -2546,8 +2543,7 @@ void SMSchedule::orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,
unsigned Pos = 0;
for (std::deque<SUnit *>::iterator I = Insts.begin(), E = Insts.end(); I != E;
++I, ++Pos) {
- for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
+ for (MachineOperand &MO : MI->operands()) {
if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()))
continue;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp
index 30745c7a5583..54c478645dcf 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp
@@ -596,8 +596,7 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI,
// MI is cheap, we probably don't want to break the critical edge for it.
// However, if this would allow some definitions of its source operands
// to be sunk then it's probably worth it.
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI.getOperand(i);
+ for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg() || !MO.isUse())
continue;
Register Reg = MO.getReg();
@@ -789,8 +788,7 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI,
// If this instruction is inside a loop and sinking this instruction can make
// more registers live range shorten, it is still prifitable.
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI.getOperand(i);
+ for (const MachineOperand &MO : MI.operands()) {
// Ignore non-register operands.
if (!MO.isReg())
continue;
@@ -889,8 +887,7 @@ MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
// SuccToSinkTo - This is the successor to sink this instruction to, once we
// decide.
MachineBasicBlock *SuccToSinkTo = nullptr;
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI.getOperand(i);
+ for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg()) continue; // Ignore non-register operands.
Register Reg = MO.getReg();
@@ -1322,8 +1319,7 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
// If the instruction to move defines a dead physical register which is live
// when leaving the basic block, don't move it because it could turn into a
// "zombie" define of that preg. E.g., EFLAGS. (<rdar://problem/8030636>)
- for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
- const MachineOperand &MO = MI.getOperand(I);
+ for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg() || MO.isUse())
continue;
Register Reg = MO.getReg();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp
index d6bb3e7c9e58..32078db76cf3 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -1276,11 +1276,9 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
if (DstTy.getNumElements() != MI->getNumOperands() - 1)
report("G_BUILD_VECTOR must have an operand for each elemement", MI);
- for (unsigned i = 2; i < MI->getNumOperands(); ++i) {
- if (MRI->getType(MI->getOperand(1).getReg()) !=
- MRI->getType(MI->getOperand(i).getReg()))
+ for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 2))
+ if (MRI->getType(MI->getOperand(1).getReg()) != MRI->getType(MO.getReg()))
report("G_BUILD_VECTOR source operand types are not homogeneous", MI);
- }
break;
}
@@ -1292,12 +1290,10 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
if (!DstTy.isVector() || SrcEltTy.isVector())
report("G_BUILD_VECTOR_TRUNC must produce a vector from scalar operands",
MI);
- for (unsigned i = 2; i < MI->getNumOperands(); ++i) {
- if (MRI->getType(MI->getOperand(1).getReg()) !=
- MRI->getType(MI->getOperand(i).getReg()))
+ for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 2))
+ if (MRI->getType(MI->getOperand(1).getReg()) != MRI->getType(MO.getReg()))
report("G_BUILD_VECTOR_TRUNC source operand types are not homogeneous",
MI);
- }
if (SrcEltTy.getSizeInBits() <= DstTy.getElementType().getSizeInBits())
report("G_BUILD_VECTOR_TRUNC source operand types are not larger than "
"dest elt type",
@@ -1316,11 +1312,9 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
if (MI->getNumOperands() < 3)
report("G_CONCAT_VECTOR requires at least 2 source operands", MI);
- for (unsigned i = 2; i < MI->getNumOperands(); ++i) {
- if (MRI->getType(MI->getOperand(1).getReg()) !=
- MRI->getType(MI->getOperand(i).getReg()))
+ for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 2))
+ if (MRI->getType(MI->getOperand(1).getReg()) != MRI->getType(MO.getReg()))
report("G_CONCAT_VECTOR source operand types are not homogeneous", MI);
- }
if (DstTy.getNumElements() !=
SrcTy.getNumElements() * (MI->getNumOperands() - 1))
report("G_CONCAT_VECTOR num dest and source elements should match", MI);
@@ -3063,9 +3057,9 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
SlotIndex PEnd = LiveInts->getMBBEndIdx(Pred);
// Predecessor of landing pad live-out on last call.
if (MFI->isEHPad()) {
- for (auto I = Pred->rbegin(), E = Pred->rend(); I != E; ++I) {
- if (I->isCall()) {
- PEnd = Indexes->getInstructionIndex(*I).getBoundaryIndex();
+ for (const MachineInstr &MI : llvm::reverse(*Pred)) {
+ if (MI.isCall()) {
+ PEnd = Indexes->getInstructionIndex(MI).getBoundaryIndex();
break;
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp
index 8b3cdfab4d42..aaa6403cc978 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp
@@ -73,8 +73,7 @@ void ModuloScheduleExpander::expand() {
// stage difference for each use. Keep the maximum value.
for (MachineInstr *MI : Schedule.getInstructions()) {
int DefStage = Schedule.getStage(MI);
- for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
- MachineOperand &Op = MI->getOperand(i);
+ for (const MachineOperand &Op : MI->operands()) {
if (!Op.isReg() || !Op.isDef())
continue;
@@ -1006,8 +1005,7 @@ void ModuloScheduleExpander::updateInstruction(MachineInstr *NewMI,
unsigned CurStageNum,
unsigned InstrStageNum,
ValueMapTy *VRMap) {
- for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = NewMI->getOperand(i);
+ for (MachineOperand &MO : NewMI->operands()) {
if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()))
continue;
Register reg = MO.getReg();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index 9a4f70a6070f..29a88480fd9f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -527,9 +527,9 @@ static void updateLiveness(MachineFunction &MF) {
const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
- for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ for (const CalleeSavedInfo &I : CSI) {
for (MachineBasicBlock *MBB : Visited) {
- MCPhysReg Reg = CSI[i].getReg();
+ MCPhysReg Reg = I.getReg();
// Add the callee-saved register as live-in.
// It's killed at the spill.
if (!MRI.isReserved(Reg) && !MBB->isLiveIn(Reg))
@@ -540,17 +540,16 @@ static void updateLiveness(MachineFunction &MF) {
// each MBB between the prologue and epilogue so that it is not clobbered
// before it is reloaded in the epilogue. The Visited set contains all
// blocks outside of the region delimited by prologue/epilogue.
- if (CSI[i].isSpilledToReg()) {
+ if (I.isSpilledToReg()) {
for (MachineBasicBlock &MBB : MF) {
if (Visited.count(&MBB))
continue;
- MCPhysReg DstReg = CSI[i].getDstReg();
+ MCPhysReg DstReg = I.getDstReg();
if (!MBB.isLiveIn(DstReg))
MBB.addLiveIn(DstReg);
}
}
}
-
}
/// Insert restore code for the callee-saved registers used in the function.
@@ -902,9 +901,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
// incoming stack pointer if a frame pointer is required and is closer
// to the incoming rather than the final stack pointer.
const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
- bool EarlyScavengingSlots = (TFI.hasFP(MF) && TFI.isFPCloseToIncomingSP() &&
- RegInfo->useFPForScavengingIndex(MF) &&
- !RegInfo->hasStackRealignment(MF));
+ bool EarlyScavengingSlots = TFI.allocateScavengingFrameIndexesNearIncomingSP(MF);
if (RS && EarlyScavengingSlots) {
SmallVector<int, 2> SFIs;
RS->getScavengingFrameIndices(SFIs);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp
index 68920e2e50df..6653145d3d2a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -1258,8 +1258,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
// Free registers occupied by defs.
// Iterate operands in reverse order, so we see the implicit super register
// defs first (we added them earlier in case of <def,read-undef>).
- for (unsigned I = MI.getNumOperands(); I-- > 0;) {
- MachineOperand &MO = MI.getOperand(I);
+ for (MachineOperand &MO : llvm::reverse(MI.operands())) {
if (!MO.isReg() || !MO.isDef())
continue;
@@ -1362,8 +1361,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
// Free early clobbers.
if (HasEarlyClobber) {
- for (unsigned I = MI.getNumOperands(); I-- > 0; ) {
- MachineOperand &MO = MI.getOperand(I);
+ for (MachineOperand &MO : llvm::reverse(MI.operands())) {
if (!MO.isReg() || !MO.isDef() || !MO.isEarlyClobber())
continue;
// subreg defs don't free the full register. We left the subreg number
@@ -1440,8 +1438,7 @@ void RegAllocFast::handleBundle(MachineInstr &MI) {
MachineBasicBlock::instr_iterator BundledMI = MI.getIterator();
++BundledMI;
while (BundledMI->isBundledWithPred()) {
- for (unsigned I = 0; I < BundledMI->getNumOperands(); ++I) {
- MachineOperand &MO = BundledMI->getOperand(I);
+ for (MachineOperand &MO : BundledMI->operands()) {
if (!MO.isReg())
continue;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp
index 5a93b58e0baf..50411c177007 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -199,7 +199,8 @@ class RAGreedy : public MachineFunctionPass,
struct RegInfo {
LiveRangeStage Stage = RS_New;
- // Cascade - Eviction loop prevention. See canEvictInterference().
+ // Cascade - Eviction loop prevention. See
+ // canEvictInterferenceBasedOnCost().
unsigned Cascade = 0;
RegInfo() = default;
@@ -207,13 +208,51 @@ class RAGreedy : public MachineFunctionPass,
IndexedMap<RegInfo, VirtReg2IndexFunctor> ExtraRegInfo;
+ LiveRangeStage getStage(Register Reg) const {
+ return ExtraRegInfo[Reg].Stage;
+ }
+
LiveRangeStage getStage(const LiveInterval &VirtReg) const {
- return ExtraRegInfo[VirtReg.reg()].Stage;
+ return getStage(VirtReg.reg());
+ }
+
+ void setStage(Register Reg, LiveRangeStage Stage) {
+ ExtraRegInfo.resize(MRI->getNumVirtRegs());
+ ExtraRegInfo[Reg].Stage = Stage;
}
void setStage(const LiveInterval &VirtReg, LiveRangeStage Stage) {
+ setStage(VirtReg.reg(), Stage);
+ }
+
+ /// Return the current stage of the register, if present, otherwise initialize
+ /// it and return that.
+ LiveRangeStage getOrInitStage(Register Reg) {
+ ExtraRegInfo.grow(Reg);
+ return getStage(Reg);
+ }
+
+ unsigned getCascade(Register Reg) const { return ExtraRegInfo[Reg].Cascade; }
+
+ void setCascade(Register Reg, unsigned Cascade) {
ExtraRegInfo.resize(MRI->getNumVirtRegs());
- ExtraRegInfo[VirtReg.reg()].Stage = Stage;
+ ExtraRegInfo[Reg].Cascade = Cascade;
+ }
+
+ unsigned getOrAssignNewCascade(Register Reg) {
+ unsigned Cascade = getCascade(Reg);
+ if (!Cascade) {
+ Cascade = NextCascade++;
+ setCascade(Reg, Cascade);
+ }
+ return Cascade;
+ }
+
+ unsigned getCascadeOrCurrentNext(Register Reg) const {
+ unsigned Cascade = getCascade(Reg);
+ if (!Cascade)
+ Cascade = NextCascade;
+ return Cascade;
}
template<typename Iterator>
@@ -410,8 +449,11 @@ private:
void calcGapWeights(MCRegister, SmallVectorImpl<float> &);
Register canReassign(LiveInterval &VirtReg, Register PrevReg) const;
bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool) const;
- bool canEvictInterference(LiveInterval &, MCRegister, bool, EvictionCost &,
- const SmallVirtRegSet &) const;
+ bool canEvictInterferenceBasedOnCost(LiveInterval &, MCRegister, bool,
+ EvictionCost &,
+ const SmallVirtRegSet &) const;
+ bool canEvictHintInterference(LiveInterval &, MCRegister,
+ const SmallVirtRegSet &) const;
bool canEvictInterferenceInRange(const LiveInterval &VirtReg,
MCRegister PhysReg, SlotIndex Start,
SlotIndex End, EvictionCost &MaxCost) const;
@@ -683,15 +725,16 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) {
assert(Reg.isVirtual() && "Can only enqueue virtual registers");
unsigned Prio;
- ExtraRegInfo.grow(Reg);
- if (ExtraRegInfo[Reg].Stage == RS_New)
- ExtraRegInfo[Reg].Stage = RS_Assign;
-
- if (ExtraRegInfo[Reg].Stage == RS_Split) {
+ auto Stage = getOrInitStage(Reg);
+ if (Stage == RS_New) {
+ Stage = RS_Assign;
+ setStage(Reg, Stage);
+ }
+ if (Stage == RS_Split) {
// Unsplit ranges that couldn't be allocated immediately are deferred until
// everything else has been allocated.
Prio = Size;
- } else if (ExtraRegInfo[Reg].Stage == RS_Memory) {
+ } else if (Stage == RS_Memory) {
// Memory operand should be considered last.
// Change the priority such that Memory operand are assigned in
// the reverse order that they came in.
@@ -706,7 +749,7 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) {
bool ForceGlobal = !ReverseLocal &&
(Size / SlotIndex::InstrDist) > (2 * RCI.getNumAllocatableRegs(&RC));
- if (ExtraRegInfo[Reg].Stage == RS_Assign && !ForceGlobal && !LI->empty() &&
+ if (Stage == RS_Assign && !ForceGlobal && !LI->empty() &&
LIS->intervalIsInOneMBB(*LI)) {
// Allocate original local ranges in linear instruction order. Since they
// are singly defined, this produces optimal coloring in the absence of
@@ -780,10 +823,8 @@ MCRegister RAGreedy::tryAssign(LiveInterval &VirtReg,
if (Order.isHint(Hint)) {
MCRegister PhysHint = Hint.asMCReg();
LLVM_DEBUG(dbgs() << "missed hint " << printReg(PhysHint, TRI) << '\n');
- EvictionCost MaxCost;
- MaxCost.setBrokenHints(1);
- if (canEvictInterference(VirtReg, PhysHint, true, MaxCost,
- FixedRegisters)) {
+
+ if (canEvictHintInterference(VirtReg, PhysHint, FixedRegisters)) {
evictInterference(VirtReg, PhysHint, NewVRegs);
return PhysHint;
}
@@ -864,8 +905,19 @@ bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint,
return false;
}
-/// canEvictInterference - Return true if all interferences between VirtReg and
-/// PhysReg can be evicted.
+/// canEvictHintInterference - return true if the interference for VirtReg
+/// on the PhysReg, which is VirtReg's hint, can be evicted in favor of VirtReg.
+bool RAGreedy::canEvictHintInterference(
+ LiveInterval &VirtReg, MCRegister PhysReg,
+ const SmallVirtRegSet &FixedRegisters) const {
+ EvictionCost MaxCost;
+ MaxCost.setBrokenHints(1);
+ return canEvictInterferenceBasedOnCost(VirtReg, PhysReg, true, MaxCost,
+ FixedRegisters);
+}
+
+/// canEvictInterferenceBasedOnCost - Return true if all interferences between
+/// VirtReg and PhysReg can be evicted.
///
/// @param VirtReg Live range that is about to be assigned.
/// @param PhysReg Desired register for assignment.
@@ -873,7 +925,7 @@ bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint,
/// @param MaxCost Only look for cheaper candidates and update with new cost
/// when returning true.
/// @returns True when interference can be evicted cheaper than MaxCost.
-bool RAGreedy::canEvictInterference(
+bool RAGreedy::canEvictInterferenceBasedOnCost(
LiveInterval &VirtReg, MCRegister PhysReg, bool IsHint,
EvictionCost &MaxCost, const SmallVirtRegSet &FixedRegisters) const {
// It is only possible to evict virtual register interference.
@@ -1054,9 +1106,7 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, MCRegister PhysReg,
// Make sure that VirtReg has a cascade number, and assign that cascade
// number to every evicted register. These live ranges than then only be
// evicted by a newer cascade, preventing infinite loops.
- unsigned Cascade = ExtraRegInfo[VirtReg.reg()].Cascade;
- if (!Cascade)
- Cascade = ExtraRegInfo[VirtReg.reg()].Cascade = NextCascade++;
+ unsigned Cascade = getOrAssignNewCascade(VirtReg.reg());
LLVM_DEBUG(dbgs() << "evicting " << printReg(PhysReg, TRI)
<< " interference: Cascade " << Cascade << '\n');
@@ -1082,10 +1132,10 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, MCRegister PhysReg,
LastEvicted.addEviction(PhysReg, VirtReg.reg(), Intf->reg());
Matrix->unassign(*Intf);
- assert((ExtraRegInfo[Intf->reg()].Cascade < Cascade ||
+ assert((getCascade(Intf->reg()) < Cascade ||
VirtReg.isSpillable() < Intf->isSpillable()) &&
"Cannot decrease cascade number, illegal eviction");
- ExtraRegInfo[Intf->reg()].Cascade = Cascade;
+ setCascade(Intf->reg(), Cascade);
++NumEvicted;
NewVRegs.push_back(Intf->reg());
}
@@ -1150,8 +1200,8 @@ MCRegister RAGreedy::tryFindEvictionCandidate(
continue;
}
- if (!canEvictInterference(VirtReg, PhysReg, false, BestCost,
- FixedRegisters))
+ if (!canEvictInterferenceBasedOnCost(VirtReg, PhysReg, false, BestCost,
+ FixedRegisters))
continue;
// Best so far.
@@ -1756,7 +1806,6 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
SE->finish(&IntvMap);
DebugVars->splitRegister(Reg, LREdit.regs(), *LIS);
- ExtraRegInfo.resize(MRI->getNumVirtRegs());
unsigned OrigBlocks = SA->getNumLiveBlocks();
// Sort out the new intervals created by splitting. We get four kinds:
@@ -1765,10 +1814,10 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
// - Block-local splits are candidates for local splitting.
// - DCE leftovers should go back on the queue.
for (unsigned I = 0, E = LREdit.size(); I != E; ++I) {
- LiveInterval &Reg = LIS->getInterval(LREdit.get(I));
+ const LiveInterval &Reg = LIS->getInterval(LREdit.get(I));
// Ignore old intervals from DCE.
- if (getStage(Reg) != RS_New)
+ if (getOrInitStage(Reg.reg()) != RS_New)
continue;
// Remainder interval. Don't try splitting again, spill if it doesn't
@@ -2012,13 +2061,11 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order,
// Tell LiveDebugVariables about the new ranges.
DebugVars->splitRegister(Reg, LREdit.regs(), *LIS);
- ExtraRegInfo.resize(MRI->getNumVirtRegs());
-
// Sort out the new intervals created by splitting. The remainder interval
// goes straight to spilling, the new local ranges get to stay RS_New.
for (unsigned I = 0, E = LREdit.size(); I != E; ++I) {
- LiveInterval &LI = LIS->getInterval(LREdit.get(I));
- if (getStage(LI) == RS_New && IntvMap[I] == 0)
+ const LiveInterval &LI = LIS->getInterval(LREdit.get(I));
+ if (getOrInitStage(LI.reg()) == RS_New && IntvMap[I] == 0)
setStage(LI, RS_Spill);
}
@@ -2104,8 +2151,6 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
SmallVector<unsigned, 8> IntvMap;
SE->finish(&IntvMap);
DebugVars->splitRegister(VirtReg.reg(), LREdit.regs(), *LIS);
- ExtraRegInfo.resize(MRI->getNumVirtRegs());
-
// Assign all new registers to RS_Spill. This was the last chance.
setStage(LREdit.begin(), LREdit.end(), RS_Spill);
return 0;
@@ -2400,7 +2445,6 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
SmallVector<unsigned, 8> IntvMap;
SE->finish(&IntvMap);
DebugVars->splitRegister(VirtReg.reg(), LREdit.regs(), *LIS);
-
// If the new range has the same number of instructions as before, mark it as
// RS_Split2 so the next split will be forced to make progress. Otherwise,
// leave the new intervals as RS_New so they can compete.
@@ -3021,7 +3065,7 @@ MCRegister RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
LiveRangeStage Stage = getStage(VirtReg);
LLVM_DEBUG(dbgs() << StageName[Stage] << " Cascade "
- << ExtraRegInfo[VirtReg.reg()].Cascade << '\n');
+ << getCascade(VirtReg.reg()) << '\n');
// Try to evict a less worthy live range, but only for ranges from the primary
// queue. The RS_Split ranges already failed to do this, and they should not
@@ -3311,7 +3355,6 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops));
SE.reset(new SplitEditor(*SA, *AA, *LIS, *VRM, *DomTree, *MBFI, *VRAI));
ExtraRegInfo.clear();
- ExtraRegInfo.resize(MRI->getNumVirtRegs());
NextCascade = 1;
IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI);
GlobalCand.resize(32); // This will grow as needed.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp
index c847068bca90..4c8534cf2d01 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -3908,20 +3908,20 @@ void RegisterCoalescer::lateLiveIntervalUpdate() {
bool RegisterCoalescer::
copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList) {
bool Progress = false;
- for (unsigned i = 0, e = CurrList.size(); i != e; ++i) {
- if (!CurrList[i])
+ for (MachineInstr *&MI : CurrList) {
+ if (!MI)
continue;
// Skip instruction pointers that have already been erased, for example by
// dead code elimination.
- if (ErasedInstrs.count(CurrList[i])) {
- CurrList[i] = nullptr;
+ if (ErasedInstrs.count(MI)) {
+ MI = nullptr;
continue;
}
bool Again = false;
- bool Success = joinCopy(CurrList[i], Again);
+ bool Success = joinCopy(MI, Again);
Progress |= Success;
if (Success || !Again)
- CurrList[i] = nullptr;
+ MI = nullptr;
}
return Progress;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
index 3f013eb6024e..0e8e8338b46d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -406,11 +406,10 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
// register in later operands. The lanes of other defs will now be live
// after this instruction, so these should not be treated as killed by the
// instruction even though they appear to be killed in this one operand.
- for (int I = OperIdx + 1, E = MI->getNumOperands(); I != E; ++I) {
- const MachineOperand &OtherMO = MI->getOperand(I);
+ for (const MachineOperand &OtherMO :
+ llvm::drop_begin(MI->operands(), OperIdx + 1))
if (OtherMO.isReg() && OtherMO.isDef() && OtherMO.getReg() == Reg)
KillLaneMask &= ~getLaneMaskForMO(OtherMO);
- }
}
// Clear undef flag, we'll re-add it later once we know which subregister
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index ce400ea43f29..df5a041b87cd 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4436,7 +4436,7 @@ SDValue DAGCombiner::visitREM(SDNode *N) {
if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
SDValue OptimizedDiv =
isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
- if (OptimizedDiv.getNode()) {
+ if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != N) {
// If the equivalent Div node also exists, update its users.
unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
@@ -4464,6 +4464,9 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
SDLoc DL(N);
if (VT.isVector()) {
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
+ return FoldedVOp;
+
// fold (mulhs x, 0) -> 0
// do not return N0/N1, because undef node may exist.
if (ISD::isConstantSplatVectorAllZeros(N0.getNode()) ||
@@ -4521,6 +4524,9 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
SDLoc DL(N);
if (VT.isVector()) {
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
+ return FoldedVOp;
+
// fold (mulhu x, 0) -> 0
// do not return N0/N1, because undef node may exist.
if (ISD::isConstantSplatVectorAllZeros(N0.getNode()) ||
@@ -4779,6 +4785,106 @@ SDValue DAGCombiner::visitMULO(SDNode *N) {
return SDValue();
}
+// Function to calculate whether the Min/Max pair of SDNodes (potentially
+// swapped around) make a signed saturate pattern, clamping to between -2^(BW-1)
+// and 2^(BW-1)-1. Returns the node being clamped and the bitwidth of the clamp
+// in BW. Should work with both SMIN/SMAX nodes and setcc/select combo. The
+// operands are the same as SimplifySelectCC. N0<N1 ? N2 : N3
+static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2,
+ SDValue N3, ISD::CondCode CC, unsigned &BW) {
+ auto isSignedMinMax = [&](SDValue N0, SDValue N1, SDValue N2, SDValue N3,
+ ISD::CondCode CC) {
+ // The compare and select operand should be the same or the select operands
+ // should be truncated versions of the comparison.
+ if (N0 != N2 && (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0)))
+ return 0;
+ // The constants need to be the same or a truncated version of each other.
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
+ ConstantSDNode *N3C = isConstOrConstSplat(N3);
+ if (!N1C || !N3C)
+ return 0;
+ const APInt &C1 = N1C->getAPIntValue();
+ const APInt &C2 = N3C->getAPIntValue();
+ if (C1.getBitWidth() < C2.getBitWidth() ||
+ C1 != C2.sextOrSelf(C1.getBitWidth()))
+ return 0;
+ return CC == ISD::SETLT ? ISD::SMIN : (CC == ISD::SETGT ? ISD::SMAX : 0);
+ };
+
+ // Check the initial value is a SMIN/SMAX equivalent.
+ unsigned Opcode0 = isSignedMinMax(N0, N1, N2, N3, CC);
+ if (!Opcode0)
+ return SDValue();
+
+ SDValue N00, N01, N02, N03;
+ ISD::CondCode N0CC;
+ switch (N0.getOpcode()) {
+ case ISD::SMIN:
+ case ISD::SMAX:
+ N00 = N02 = N0.getOperand(0);
+ N01 = N03 = N0.getOperand(1);
+ N0CC = N0.getOpcode() == ISD::SMIN ? ISD::SETLT : ISD::SETGT;
+ break;
+ case ISD::SELECT_CC:
+ N00 = N0.getOperand(0);
+ N01 = N0.getOperand(1);
+ N02 = N0.getOperand(2);
+ N03 = N0.getOperand(3);
+ N0CC = cast<CondCodeSDNode>(N0.getOperand(4))->get();
+ break;
+ case ISD::SELECT:
+ case ISD::VSELECT:
+ if (N0.getOperand(0).getOpcode() != ISD::SETCC)
+ return SDValue();
+ N00 = N0.getOperand(0).getOperand(0);
+ N01 = N0.getOperand(0).getOperand(1);
+ N02 = N0.getOperand(1);
+ N03 = N0.getOperand(2);
+ N0CC = cast<CondCodeSDNode>(N0.getOperand(0).getOperand(2))->get();
+ break;
+ default:
+ return SDValue();
+ }
+
+ unsigned Opcode1 = isSignedMinMax(N00, N01, N02, N03, N0CC);
+ if (!Opcode1 || Opcode0 == Opcode1)
+ return SDValue();
+
+ ConstantSDNode *MinCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N1 : N01);
+ ConstantSDNode *MaxCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N01 : N1);
+ if (!MinCOp || !MaxCOp || MinCOp->getValueType(0) != MaxCOp->getValueType(0))
+ return SDValue();
+
+ const APInt &MinC = MinCOp->getAPIntValue();
+ const APInt &MaxC = MaxCOp->getAPIntValue();
+ APInt MinCPlus1 = MinC + 1;
+ if (-MaxC != MinCPlus1 || !MinCPlus1.isPowerOf2())
+ return SDValue();
+ BW = MinCPlus1.exactLogBase2() + 1;
+ return N02;
+}
+
+static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2,
+ SDValue N3, ISD::CondCode CC,
+ SelectionDAG &DAG) {
+ unsigned BW;
+ SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW);
+ if (!Fp || Fp.getOpcode() != ISD::FP_TO_SINT)
+ return SDValue();
+ EVT FPVT = Fp.getOperand(0).getValueType();
+ EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
+ if (FPVT.isVector())
+ NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
+ FPVT.getVectorElementCount());
+ if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(
+ ISD::FP_TO_SINT_SAT, Fp.getOperand(0).getValueType(), NewVT))
+ return SDValue();
+ SDLoc DL(Fp);
+ SDValue Sat = DAG.getNode(ISD::FP_TO_SINT_SAT, DL, NewVT, Fp.getOperand(0),
+ DAG.getValueType(NewVT.getScalarType()));
+ return DAG.getSExtOrTrunc(Sat, DL, N2->getValueType(0));
+}
+
SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -4817,6 +4923,11 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
return DAG.getNode(AltOpcode, DL, VT, N0, N1);
}
+ if (Opcode == ISD::SMIN || Opcode == ISD::SMAX)
+ if (SDValue S = PerformMinMaxFpToSatCombine(
+ N0, N1, N0, N1, Opcode == ISD::SMIN ? ISD::SETLT : ISD::SETGT, DAG))
+ return S;
+
// Simplify the operands using demanded-bits information.
if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
@@ -9940,9 +10051,8 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
// If this is a masked load with an all ones mask, we can use a unmasked load.
// FIXME: Can we do this for indexed, compressing, or truncating stores?
- if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) &&
- MST->isUnindexed() && !MST->isCompressingStore() &&
- !MST->isTruncatingStore())
+ if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MST->isUnindexed() &&
+ !MST->isCompressingStore() && !MST->isTruncatingStore())
return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
MST->getBasePtr(), MST->getMemOperand());
@@ -9997,9 +10107,8 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
// If this is a masked load with an all ones mask, we can use a unmasked load.
// FIXME: Can we do this for indexed, expanding, or extending loads?
- if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) &&
- MLD->isUnindexed() && !MLD->isExpandingLoad() &&
- MLD->getExtensionType() == ISD::NON_EXTLOAD) {
+ if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MLD->isUnindexed() &&
+ !MLD->isExpandingLoad() && MLD->getExtensionType() == ISD::NON_EXTLOAD) {
SDValue NewLd = DAG.getLoad(N->getValueType(0), SDLoc(N), MLD->getChain(),
MLD->getBasePtr(), MLD->getMemOperand());
return CombineTo(N, NewLd, NewLd.getValue(1));
@@ -10138,6 +10247,9 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
return FMinMax;
}
+ if (SDValue S = PerformMinMaxFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
+ return S;
+
// If this select has a condition (setcc) with narrower operands than the
// select, try to widen the compare to match the select width.
// TODO: This should be extended to handle any constant.
@@ -15007,7 +15119,7 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
// fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
- TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
+ TLI.isLoadExtLegalOrCustom(ISD::EXTLOAD, VT, N0.getValueType())) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
LN0->getChain(),
@@ -23034,6 +23146,9 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
DAG.getSExtOrTrunc(CC == ISD::SETLT ? N3 : N2, DL, VT));
}
+ if (SDValue S = PerformMinMaxFpToSatCombine(N0, N1, N2, N3, CC, DAG))
+ return S;
+
return SDValue();
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index c1bb65409282..331e0325aea3 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -765,7 +765,7 @@ InstrEmitter::EmitDbgInstrRef(SDDbgValue *SD,
assert(!SD->isVariadic());
SDDbgOperand DbgOperand = SD->getLocationOps()[0];
MDNode *Var = SD->getVariable();
- MDNode *Expr = SD->getExpression();
+ DIExpression *Expr = (DIExpression*)SD->getExpression();
DebugLoc DL = SD->getDebugLoc();
const MCInstrDesc &RefII = TII->get(TargetOpcode::DBG_INSTR_REF);
@@ -775,6 +775,13 @@ InstrEmitter::EmitDbgInstrRef(SDDbgValue *SD,
DbgOperand.getKind() == SDDbgOperand::CONST)
return EmitDbgValueFromSingleOp(SD, VRBaseMap);
+ // Immediately fold any indirectness from the LLVM-IR intrinsic into the
+ // expression:
+ if (SD->isIndirect()) {
+ std::vector<uint64_t> Elts = {dwarf::DW_OP_deref};
+ Expr = DIExpression::append(Expr, Elts);
+ }
+
// It may not be immediately possible to identify the MachineInstr that
// defines a VReg, it can depend for example on the order blocks are
// emitted in. When this happens, or when further analysis is needed later,
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index eb9d2286aeb4..08598eeded7a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -3553,9 +3553,10 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
// Node.
Tmp1 = Node->getOperand(0);
Tmp2 = Node->getOperand(1);
- if (Tmp2.getOpcode() == ISD::SETCC) {
- Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other,
- Tmp1, Tmp2.getOperand(2),
+ if (Tmp2.getOpcode() == ISD::SETCC &&
+ TLI.isOperationLegalOrCustom(ISD::BR_CC,
+ Tmp2.getOperand(0).getValueType())) {
+ Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other, Tmp1, Tmp2.getOperand(2),
Tmp2.getOperand(0), Tmp2.getOperand(1),
Node->getOperand(2));
} else {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 1f73c9eea104..98312f91d8c0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -28,7 +28,7 @@ using namespace llvm;
static cl::opt<bool>
EnableExpensiveChecks("enable-legalize-types-checking", cl::Hidden);
-/// Do extensive, expensive, sanity checking.
+/// Do extensive, expensive, basic correctness checking.
void DAGTypeLegalizer::PerformExpensiveChecks() {
// If a node is not processed, then none of its values should be mapped by any
// of PromotedIntegers, ExpandedIntegers, ..., ReplacedValues.
@@ -534,7 +534,8 @@ SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) {
// The node morphed into a different node. Normally for this to happen
// the original node would have to be marked NewNode. However this can
// in theory momentarily not be the case while ReplaceValueWith is doing
- // its stuff. Mark the original node NewNode to help sanity checking.
+ // its stuff. Mark the original node NewNode to help basic correctness
+ // checking.
N->setNodeId(NewNode);
if (M->getNodeId() != NewNode && M->getNodeId() != Unanalyzed)
// It morphed into a previously analyzed node - nothing more to do.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 539c9cb9c256..7ec2638b1e71 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -1820,10 +1820,10 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
else
std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, dl);
- unsigned LoSize = MemoryLocation::getSizeOrUnknown(LoMemVT.getStoreSize());
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
- MLD->getPointerInfo(), MachineMemOperand::MOLoad, LoSize, Alignment,
- MLD->getAAInfo(), MLD->getRanges());
+ MLD->getPointerInfo(), MachineMemOperand::MOLoad,
+ MemoryLocation::UnknownSize, Alignment, MLD->getAAInfo(),
+ MLD->getRanges());
Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, Offset, MaskLo, PassThruLo, LoMemVT,
MMO, MLD->getAddressingMode(), ExtType,
@@ -1837,7 +1837,6 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
// Generate hi masked load.
Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, dl, LoMemVT, DAG,
MLD->isExpandingLoad());
- unsigned HiSize = MemoryLocation::getSizeOrUnknown(HiMemVT.getStoreSize());
MachinePointerInfo MPI;
if (LoMemVT.isScalableVector())
@@ -1847,8 +1846,8 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
LoMemVT.getStoreSize().getFixedSize());
MMO = DAG.getMachineFunction().getMachineMemOperand(
- MPI, MachineMemOperand::MOLoad, HiSize, Alignment, MLD->getAAInfo(),
- MLD->getRanges());
+ MPI, MachineMemOperand::MOLoad, MemoryLocation::UnknownSize, Alignment,
+ MLD->getAAInfo(), MLD->getRanges());
Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, Offset, MaskHi, PassThruHi,
HiMemVT, MMO, MLD->getAddressingMode(), ExtType,
@@ -2662,10 +2661,9 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
DAG.GetDependentSplitDestVTs(MemoryVT, DataLo.getValueType(), &HiIsEmpty);
SDValue Lo, Hi, Res;
- unsigned LoSize = MemoryLocation::getSizeOrUnknown(LoMemVT.getStoreSize());
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
- N->getPointerInfo(), MachineMemOperand::MOStore, LoSize, Alignment,
- N->getAAInfo(), N->getRanges());
+ N->getPointerInfo(), MachineMemOperand::MOStore,
+ MemoryLocation::UnknownSize, Alignment, N->getAAInfo(), N->getRanges());
Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, Offset, MaskLo, LoMemVT, MMO,
N->getAddressingMode(), N->isTruncatingStore(),
@@ -2689,10 +2687,9 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
MPI = N->getPointerInfo().getWithOffset(
LoMemVT.getStoreSize().getFixedSize());
- unsigned HiSize = MemoryLocation::getSizeOrUnknown(HiMemVT.getStoreSize());
MMO = DAG.getMachineFunction().getMachineMemOperand(
- MPI, MachineMemOperand::MOStore, HiSize, Alignment, N->getAAInfo(),
- N->getRanges());
+ MPI, MachineMemOperand::MOStore, MemoryLocation::UnknownSize, Alignment,
+ N->getAAInfo(), N->getRanges());
Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, Offset, MaskHi, HiMemVT, MMO,
N->getAddressingMode(), N->isTruncatingStore(),
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
index 55fe26eb64cd..2695ed36991c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -268,8 +268,8 @@ bool ResourcePriorityQueue::isResourceAvailable(SUnit *SU) {
// Now see if there are no other dependencies
// to instructions already in the packet.
- for (unsigned i = 0, e = Packet.size(); i != e; ++i)
- for (const SDep &Succ : Packet[i]->Succs) {
+ for (const SUnit *S : Packet)
+ for (const SDep &Succ : S->Succs) {
// Since we do not add pseudos to packets, might as well
// ignore order deps.
if (Succ.isCtrl())
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 95f7e43b151d..84e6d2a16422 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -706,8 +706,8 @@ void ScheduleDAGSDNodes::dump() const {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void ScheduleDAGSDNodes::dumpSchedule() const {
- for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
- if (SUnit *SU = Sequence[i])
+ for (const SUnit *SU : Sequence) {
+ if (SU)
dumpNode(*SU);
else
dbgs() << "**** NOOP ****\n";
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 008665d50233..c282e03387dd 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -406,8 +406,8 @@ bool ISD::isVPOpcode(unsigned Opcode) {
switch (Opcode) {
default:
return false;
-#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) \
- case ISD::SDOPC: \
+#define BEGIN_REGISTER_VP_SDNODE(VPSD, ...) \
+ case ISD::VPSD: \
return true;
#include "llvm/IR/VPIntrinsics.def"
}
@@ -416,23 +416,25 @@ bool ISD::isVPOpcode(unsigned Opcode) {
bool ISD::isVPBinaryOp(unsigned Opcode) {
switch (Opcode) {
default:
- return false;
-#define PROPERTY_VP_BINARYOP_SDNODE(SDOPC) \
- case ISD::SDOPC: \
- return true;
+ break;
+#define BEGIN_REGISTER_VP_SDNODE(VPSD, ...) case ISD::VPSD:
+#define VP_PROPERTY_BINARYOP return true;
+#define END_REGISTER_VP_SDNODE(VPSD) break;
#include "llvm/IR/VPIntrinsics.def"
}
+ return false;
}
bool ISD::isVPReduction(unsigned Opcode) {
switch (Opcode) {
default:
- return false;
-#define PROPERTY_VP_REDUCTION_SDNODE(SDOPC) \
- case ISD::SDOPC: \
- return true;
+ break;
+#define BEGIN_REGISTER_VP_SDNODE(VPSD, ...) case ISD::VPSD:
+#define VP_PROPERTY_REDUCTION(STARTPOS, ...) return true;
+#define END_REGISTER_VP_SDNODE(VPSD) break;
#include "llvm/IR/VPIntrinsics.def"
}
+ return false;
}
/// The operand position of the vector mask.
@@ -440,8 +442,8 @@ Optional<unsigned> ISD::getVPMaskIdx(unsigned Opcode) {
switch (Opcode) {
default:
return None;
-#define BEGIN_REGISTER_VP_SDNODE(SDOPC, LEGALPOS, TDNAME, MASKPOS, ...) \
- case ISD::SDOPC: \
+#define BEGIN_REGISTER_VP_SDNODE(VPSD, LEGALPOS, TDNAME, MASKPOS, ...) \
+ case ISD::VPSD: \
return MASKPOS;
#include "llvm/IR/VPIntrinsics.def"
}
@@ -452,8 +454,8 @@ Optional<unsigned> ISD::getVPExplicitVectorLengthIdx(unsigned Opcode) {
switch (Opcode) {
default:
return None;
-#define BEGIN_REGISTER_VP_SDNODE(SDOPC, LEGALPOS, TDNAME, MASKPOS, EVLPOS) \
- case ISD::SDOPC: \
+#define BEGIN_REGISTER_VP_SDNODE(VPSD, LEGALPOS, TDNAME, MASKPOS, EVLPOS) \
+ case ISD::VPSD: \
return EVLPOS;
#include "llvm/IR/VPIntrinsics.def"
}
@@ -974,7 +976,7 @@ void SelectionDAG::DeallocateNode(SDNode *N) {
}
#ifndef NDEBUG
-/// VerifySDNode - Sanity check the given SDNode. Aborts if it is invalid.
+/// VerifySDNode - Check the given SDNode. Aborts if it is invalid.
static void VerifySDNode(SDNode *N) {
switch (N->getOpcode()) {
default:
@@ -4540,10 +4542,25 @@ bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const {
}
// FIXME: unify with llvm::haveNoCommonBitsSet.
-// FIXME: could also handle masked merge pattern (X & ~M) op (Y & M)
bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const {
assert(A.getValueType() == B.getValueType() &&
"Values must have the same type");
+ // Match masked merge pattern (X & ~M) op (Y & M)
+ if (A->getOpcode() == ISD::AND && B->getOpcode() == ISD::AND) {
+ auto MatchNoCommonBitsPattern = [&](SDValue NotM, SDValue And) {
+ if (isBitwiseNot(NotM, true)) {
+ SDValue NotOperand = NotM->getOperand(0);
+ return NotOperand == And->getOperand(0) ||
+ NotOperand == And->getOperand(1);
+ }
+ return false;
+ };
+ if (MatchNoCommonBitsPattern(A->getOperand(0), B) ||
+ MatchNoCommonBitsPattern(A->getOperand(1), B) ||
+ MatchNoCommonBitsPattern(B->getOperand(0), A) ||
+ MatchNoCommonBitsPattern(B->getOperand(1), A))
+ return true;
+ }
return KnownBits::haveNoCommonBitsSet(computeKnownBits(A),
computeKnownBits(B));
}
@@ -5070,7 +5087,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
return getUNDEF(VT);
break;
case ISD::BITCAST:
- // Basic sanity checking.
assert(VT.getSizeInBits() == Operand.getValueSizeInBits() &&
"Cannot BITCAST between types of different sizes!");
if (VT == Operand.getValueType()) return Operand; // noop conversion.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 5d911c165293..7726a0007e44 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -4336,9 +4336,7 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore,
- // TODO: Make MachineMemOperands aware of scalable
- // vectors.
- VT.getStoreSize().getKnownMinSize(), *Alignment, I.getAAMetadata());
+ MemoryLocation::UnknownSize, *Alignment, I.getAAMetadata());
SDValue StoreNode =
DAG.getMaskedStore(getMemoryRoot(), sdl, Src0, Ptr, Offset, Mask, VT, MMO,
ISD::UNINDEXED, false /* Truncating */, IsCompressing);
@@ -4496,22 +4494,14 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
// Do not serialize masked loads of constant memory with anything.
- MemoryLocation ML;
- if (VT.isScalableVector())
- ML = MemoryLocation::getAfter(PtrOperand);
- else
- ML = MemoryLocation(PtrOperand, LocationSize::precise(
- DAG.getDataLayout().getTypeStoreSize(I.getType())),
- AAInfo);
+ MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo);
bool AddToChain = !AA || !AA->pointsToConstantMemory(ML);
SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad,
- // TODO: Make MachineMemOperands aware of scalable
- // vectors.
- VT.getStoreSize().getKnownMinSize(), *Alignment, AAInfo, Ranges);
+ MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges);
SDValue Load =
DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Offset, Mask, Src0, VT, MMO,
@@ -5807,8 +5797,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::vscale: {
match(&I, m_VScale(DAG.getDataLayout()));
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
- setValue(&I,
- DAG.getVScale(getCurSDLoc(), VT, APInt(VT.getSizeInBits(), 1)));
+ setValue(&I, DAG.getVScale(sdl, VT, APInt(VT.getSizeInBits(), 1)));
return;
}
case Intrinsic::vastart: visitVAStart(I); return;
@@ -6942,10 +6931,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
const TargetRegisterClass *PtrRC = TLI.getRegClassFor(PtrVT);
unsigned VReg = FuncInfo.getCatchPadExceptionPointerVReg(CPI, PtrRC);
- SDValue N =
- DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), VReg, PtrVT);
+ SDValue N = DAG.getCopyFromReg(DAG.getEntryNode(), sdl, VReg, PtrVT);
if (Intrinsic == Intrinsic::eh_exceptioncode)
- N = DAG.getZExtOrTrunc(N, getCurSDLoc(), MVT::i32);
+ N = DAG.getZExtOrTrunc(N, sdl, MVT::i32);
setValue(&I, N);
return;
}
@@ -6957,7 +6945,6 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
if (Triple.getArch() != Triple::x86_64)
return;
- SDLoc DL = getCurSDLoc();
SmallVector<SDValue, 8> Ops;
// We want to say that we always want the arguments in registers.
@@ -6974,7 +6961,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
// see that some registers may be assumed clobbered and have to preserve
// them across calls to the intrinsic.
MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHABLE_EVENT_CALL,
- DL, NodeTys, Ops);
+ sdl, NodeTys, Ops);
SDValue patchableNode = SDValue(MN, 0);
DAG.setRoot(patchableNode);
setValue(&I, patchableNode);
@@ -6988,7 +6975,6 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
if (Triple.getArch() != Triple::x86_64)
return;
- SDLoc DL = getCurSDLoc();
SmallVector<SDValue, 8> Ops;
// We want to say that we always want the arguments in registers.
@@ -7009,7 +6995,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
// see that some registers may be assumed clobbered and have to preserve
// them across calls to the intrinsic.
MachineSDNode *MN = DAG.getMachineNode(
- TargetOpcode::PATCHABLE_TYPED_EVENT_CALL, DL, NodeTys, Ops);
+ TargetOpcode::PATCHABLE_TYPED_EVENT_CALL, sdl, NodeTys, Ops);
SDValue patchableNode = SDValue(MN, 0);
DAG.setRoot(patchableNode);
setValue(&I, patchableNode);
@@ -7047,7 +7033,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
if (!Base)
report_fatal_error(
"llvm.icall.branch.funnel operand must be a GlobalValue");
- Ops.push_back(DAG.getTargetGlobalAddress(Base, getCurSDLoc(), MVT::i64, 0));
+ Ops.push_back(DAG.getTargetGlobalAddress(Base, sdl, MVT::i64, 0));
struct BranchFunnelTarget {
int64_t Offset;
@@ -7068,8 +7054,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
report_fatal_error(
"llvm.icall.branch.funnel operand must be a GlobalValue");
Targets.push_back({Offset, DAG.getTargetGlobalAddress(
- GA->getGlobal(), getCurSDLoc(),
- Val.getValueType(), GA->getOffset())});
+ GA->getGlobal(), sdl, Val.getValueType(),
+ GA->getOffset())});
}
llvm::sort(Targets,
[](const BranchFunnelTarget &T1, const BranchFunnelTarget &T2) {
@@ -7077,13 +7063,13 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
});
for (auto &T : Targets) {
- Ops.push_back(DAG.getTargetConstant(T.Offset, getCurSDLoc(), MVT::i32));
+ Ops.push_back(DAG.getTargetConstant(T.Offset, sdl, MVT::i32));
Ops.push_back(T.Target);
}
Ops.push_back(DAG.getRoot()); // Chain
- SDValue N(DAG.getMachineNode(TargetOpcode::ICALL_BRANCH_FUNNEL,
- getCurSDLoc(), MVT::Other, Ops),
+ SDValue N(DAG.getMachineNode(TargetOpcode::ICALL_BRANCH_FUNNEL, sdl,
+ MVT::Other, Ops),
0);
DAG.setRoot(N);
setValue(&I, N);
@@ -7102,7 +7088,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
bool ZeroMemory = Intrinsic == Intrinsic::aarch64_settag_zero;
SDValue Val = TSI.EmitTargetCodeForSetTag(
- DAG, getCurSDLoc(), getRoot(), getValue(I.getArgOperand(0)),
+ DAG, sdl, getRoot(), getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)), MachinePointerInfo(I.getArgOperand(0)),
ZeroMemory);
DAG.setRoot(Val);
@@ -7114,46 +7100,42 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
SDValue Const = getValue(I.getOperand(1));
EVT PtrVT = Ptr.getValueType();
- setValue(&I, DAG.getNode(ISD::AND, getCurSDLoc(), PtrVT, Ptr,
- DAG.getZExtOrTrunc(Const, getCurSDLoc(), PtrVT)));
+ setValue(&I, DAG.getNode(ISD::AND, sdl, PtrVT, Ptr,
+ DAG.getZExtOrTrunc(Const, sdl, PtrVT)));
return;
}
case Intrinsic::get_active_lane_mask: {
- auto DL = getCurSDLoc();
+ EVT CCVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
SDValue Index = getValue(I.getOperand(0));
- SDValue TripCount = getValue(I.getOperand(1));
- Type *ElementTy = I.getOperand(0)->getType();
- EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
- unsigned VecWidth = VT.getVectorNumElements();
+ EVT ElementVT = Index.getValueType();
- SmallVector<SDValue, 16> OpsTripCount;
- SmallVector<SDValue, 16> OpsIndex;
- SmallVector<SDValue, 16> OpsStepConstants;
- for (unsigned i = 0; i < VecWidth; i++) {
- OpsTripCount.push_back(TripCount);
- OpsIndex.push_back(Index);
- OpsStepConstants.push_back(
- DAG.getConstant(i, DL, EVT::getEVT(ElementTy)));
+ if (!TLI.shouldExpandGetActiveLaneMask(CCVT, ElementVT)) {
+ visitTargetIntrinsic(I, Intrinsic);
+ return;
}
- EVT CCVT = EVT::getVectorVT(I.getContext(), MVT::i1, VecWidth);
+ SDValue TripCount = getValue(I.getOperand(1));
+ auto VecTy = CCVT.changeVectorElementType(ElementVT);
- auto VecTy = EVT::getEVT(FixedVectorType::get(ElementTy, VecWidth));
- SDValue VectorIndex = DAG.getBuildVector(VecTy, DL, OpsIndex);
- SDValue VectorStep = DAG.getBuildVector(VecTy, DL, OpsStepConstants);
+ SDValue VectorIndex, VectorTripCount;
+ if (VecTy.isScalableVector()) {
+ VectorIndex = DAG.getSplatVector(VecTy, sdl, Index);
+ VectorTripCount = DAG.getSplatVector(VecTy, sdl, TripCount);
+ } else {
+ VectorIndex = DAG.getSplatBuildVector(VecTy, sdl, Index);
+ VectorTripCount = DAG.getSplatBuildVector(VecTy, sdl, TripCount);
+ }
+ SDValue VectorStep = DAG.getStepVector(sdl, VecTy);
SDValue VectorInduction = DAG.getNode(
- ISD::UADDO, DL, DAG.getVTList(VecTy, CCVT), VectorIndex, VectorStep);
- SDValue VectorTripCount = DAG.getBuildVector(VecTy, DL, OpsTripCount);
- SDValue SetCC = DAG.getSetCC(DL, CCVT, VectorInduction.getValue(0),
+ ISD::UADDO, sdl, DAG.getVTList(VecTy, CCVT), VectorIndex, VectorStep);
+ SDValue SetCC = DAG.getSetCC(sdl, CCVT, VectorInduction.getValue(0),
VectorTripCount, ISD::CondCode::SETULT);
- setValue(&I, DAG.getNode(ISD::AND, DL, CCVT,
- DAG.getNOT(DL, VectorInduction.getValue(1), CCVT),
+ setValue(&I, DAG.getNode(ISD::AND, sdl, CCVT,
+ DAG.getNOT(sdl, VectorInduction.getValue(1), CCVT),
SetCC));
return;
}
case Intrinsic::experimental_vector_insert: {
- auto DL = getCurSDLoc();
-
SDValue Vec = getValue(I.getOperand(0));
SDValue SubVec = getValue(I.getOperand(1));
SDValue Index = getValue(I.getOperand(2));
@@ -7163,16 +7145,14 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
MVT VectorIdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
if (Index.getValueType() != VectorIdxTy)
Index = DAG.getVectorIdxConstant(
- cast<ConstantSDNode>(Index)->getZExtValue(), DL);
+ cast<ConstantSDNode>(Index)->getZExtValue(), sdl);
EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
- setValue(&I, DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ResultVT, Vec, SubVec,
+ setValue(&I, DAG.getNode(ISD::INSERT_SUBVECTOR, sdl, ResultVT, Vec, SubVec,
Index));
return;
}
case Intrinsic::experimental_vector_extract: {
- auto DL = getCurSDLoc();
-
SDValue Vec = getValue(I.getOperand(0));
SDValue Index = getValue(I.getOperand(1));
EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
@@ -7182,9 +7162,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
MVT VectorIdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
if (Index.getValueType() != VectorIdxTy)
Index = DAG.getVectorIdxConstant(
- cast<ConstantSDNode>(Index)->getZExtValue(), DL);
+ cast<ConstantSDNode>(Index)->getZExtValue(), sdl);
- setValue(&I, DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResultVT, Vec, Index));
+ setValue(&I,
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, sdl, ResultVT, Vec, Index));
return;
}
case Intrinsic::experimental_vector_reverse:
@@ -7314,9 +7295,9 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
static unsigned getISDForVPIntrinsic(const VPIntrinsic &VPIntrin) {
Optional<unsigned> ResOPC;
switch (VPIntrin.getIntrinsicID()) {
-#define BEGIN_REGISTER_VP_INTRINSIC(INTRIN, ...) case Intrinsic::INTRIN:
-#define BEGIN_REGISTER_VP_SDNODE(VPSDID, ...) ResOPC = ISD::VPSDID;
-#define END_REGISTER_VP_INTRINSIC(...) break;
+#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID:
+#define BEGIN_REGISTER_VP_SDNODE(VPSD, ...) ResOPC = ISD::VPSD;
+#define END_REGISTER_VP_INTRINSIC(VPID) break;
#include "llvm/IR/VPIntrinsics.def"
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index e4a69adff05b..737695b5eabe 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -645,6 +645,7 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
if (DemandedBits == 0 || DemandedElts == 0)
return DAG.getUNDEF(Op.getValueType());
+ bool IsLE = DAG.getDataLayout().isLittleEndian();
unsigned NumElts = DemandedElts.getBitWidth();
unsigned BitWidth = DemandedBits.getBitWidth();
KnownBits LHSKnown, RHSKnown;
@@ -663,16 +664,15 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
Src, DemandedBits, DemandedElts, DAG, Depth + 1))
return DAG.getBitcast(DstVT, V);
- // TODO - bigendian once we have test coverage.
- if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0 &&
- DAG.getDataLayout().isLittleEndian()) {
+ if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
unsigned Scale = NumDstEltBits / NumSrcEltBits;
unsigned NumSrcElts = SrcVT.getVectorNumElements();
APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
for (unsigned i = 0; i != Scale; ++i) {
- unsigned Offset = i * NumSrcEltBits;
- APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
+ unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
+ unsigned BitOffset = EltOffset * NumSrcEltBits;
+ APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
if (!Sub.isZero()) {
DemandedSrcBits |= Sub;
for (unsigned j = 0; j != NumElts; ++j)
@@ -687,8 +687,7 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
}
// TODO - bigendian once we have test coverage.
- if ((NumSrcEltBits % NumDstEltBits) == 0 &&
- DAG.getDataLayout().isLittleEndian()) {
+ if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
unsigned Scale = NumSrcEltBits / NumDstEltBits;
unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
@@ -802,8 +801,8 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
SDValue Src = Op.getOperand(0);
EVT SrcVT = Src.getValueType();
EVT DstVT = Op.getValueType();
- if (DemandedElts == 1 && DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
- DAG.getDataLayout().isLittleEndian() &&
+ if (IsLE && DemandedElts == 1 &&
+ DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
return DAG.getBitcast(DstVT, Src);
}
@@ -913,6 +912,7 @@ bool TargetLowering::SimplifyDemandedBits(
if (Op.getValueType().isScalableVector())
return false;
+ bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
unsigned NumElts = OriginalDemandedElts.getBitWidth();
assert((!Op.getValueType().isVector() ||
NumElts == Op.getValueType().getVectorNumElements()) &&
@@ -1725,11 +1725,40 @@ bool TargetLowering::SimplifyDemandedBits(
case ISD::ROTR: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
+ bool IsROTL = (Op.getOpcode() == ISD::ROTL);
// If we're rotating an 0/-1 value, then it stays an 0/-1 value.
if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
return TLO.CombineTo(Op, Op0);
+ if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
+ unsigned Amt = SA->getAPIntValue().urem(BitWidth);
+ unsigned RevAmt = BitWidth - Amt;
+
+ // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
+ // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
+ APInt Demanded0 = DemandedBits.rotr(IsROTL ? Amt : RevAmt);
+ if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
+ Depth + 1))
+ return true;
+
+ // rot*(x, 0) --> x
+ if (Amt == 0)
+ return TLO.CombineTo(Op, Op0);
+
+ // See if we don't demand either half of the rotated bits.
+ if ((!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT)) &&
+ DemandedBits.countTrailingZeros() >= (IsROTL ? Amt : RevAmt)) {
+ Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType());
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1));
+ }
+ if ((!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) &&
+ DemandedBits.countLeadingZeros() >= (IsROTL ? RevAmt : Amt)) {
+ Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType());
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
+ }
+ }
+
// For pow-2 bitwidths we only demand the bottom modulo amt bits.
if (isPowerOf2_32(BitWidth)) {
APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
@@ -1887,9 +1916,8 @@ bool TargetLowering::SimplifyDemandedBits(
if (DemandedBits.getActiveBits() <= InBits) {
// If we only need the non-extended bits of the bottom element
// then we can just bitcast to the result.
- if (IsVecInReg && DemandedElts == 1 &&
- VT.getSizeInBits() == SrcVT.getSizeInBits() &&
- TLO.DAG.getDataLayout().isLittleEndian())
+ if (IsLE && IsVecInReg && DemandedElts == 1 &&
+ VT.getSizeInBits() == SrcVT.getSizeInBits())
return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
unsigned Opc =
@@ -1925,9 +1953,8 @@ bool TargetLowering::SimplifyDemandedBits(
if (DemandedBits.getActiveBits() <= InBits) {
// If we only need the non-extended bits of the bottom element
// then we can just bitcast to the result.
- if (IsVecInReg && DemandedElts == 1 &&
- VT.getSizeInBits() == SrcVT.getSizeInBits() &&
- TLO.DAG.getDataLayout().isLittleEndian())
+ if (IsLE && IsVecInReg && DemandedElts == 1 &&
+ VT.getSizeInBits() == SrcVT.getSizeInBits())
return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
unsigned Opc =
@@ -1976,9 +2003,8 @@ bool TargetLowering::SimplifyDemandedBits(
// If we only need the bottom element then we can just bitcast.
// TODO: Handle ANY_EXTEND?
- if (IsVecInReg && DemandedElts == 1 &&
- VT.getSizeInBits() == SrcVT.getSizeInBits() &&
- TLO.DAG.getDataLayout().isLittleEndian())
+ if (IsLE && IsVecInReg && DemandedElts == 1 &&
+ VT.getSizeInBits() == SrcVT.getSizeInBits())
return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
APInt InDemandedBits = DemandedBits.trunc(InBits);
@@ -2140,16 +2166,15 @@ bool TargetLowering::SimplifyDemandedBits(
// Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
// Demand the elt/bit if any of the original elts/bits are demanded.
- // TODO - bigendian once we have test coverage.
- if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0 &&
- TLO.DAG.getDataLayout().isLittleEndian()) {
+ if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
unsigned Scale = BitWidth / NumSrcEltBits;
unsigned NumSrcElts = SrcVT.getVectorNumElements();
APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
for (unsigned i = 0; i != Scale; ++i) {
- unsigned Offset = i * NumSrcEltBits;
- APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
+ unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
+ unsigned BitOffset = EltOffset * NumSrcEltBits;
+ APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
if (!Sub.isZero()) {
DemandedSrcBits |= Sub;
for (unsigned j = 0; j != NumElts; ++j)
@@ -2167,8 +2192,8 @@ bool TargetLowering::SimplifyDemandedBits(
if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
KnownSrcBits, TLO, Depth + 1))
return true;
- } else if ((NumSrcEltBits % BitWidth) == 0 &&
- TLO.DAG.getDataLayout().isLittleEndian()) {
+ } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
+ // TODO - bigendian once we have test coverage.
unsigned Scale = NumSrcEltBits / BitWidth;
unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
@@ -2409,6 +2434,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
SDLoc DL(Op);
unsigned EltSizeInBits = VT.getScalarSizeInBits();
+ bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
// Helper for demanding the specified elements and all the bits of both binary
// operands.
@@ -2484,7 +2510,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
// Try calling SimplifyDemandedBits, converting demanded elts to the bits
// of the large element.
// TODO - bigendian once we have test coverage.
- if (TLO.DAG.getDataLayout().isLittleEndian()) {
+ if (IsLE) {
unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits);
for (unsigned i = 0; i != NumElts; ++i)
@@ -2797,9 +2823,9 @@ bool TargetLowering::SimplifyDemandedVectorElts(
KnownZero = SrcZero.zextOrTrunc(NumElts);
KnownUndef = SrcUndef.zextOrTrunc(NumElts);
- if (Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
+ if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
- DemandedSrcElts == 1 && TLO.DAG.getDataLayout().isLittleEndian()) {
+ DemandedSrcElts == 1) {
// aext - if we just need the bottom element then we can bitcast.
return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
}
@@ -2812,8 +2838,8 @@ bool TargetLowering::SimplifyDemandedVectorElts(
// zext - if we just need the bottom element then we can mask:
// zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
- if (DemandedSrcElts == 1 && TLO.DAG.getDataLayout().isLittleEndian() &&
- Src.getOpcode() == ISD::AND && Op->isOnlyUserOf(Src.getNode()) &&
+ if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND &&
+ Op->isOnlyUserOf(Src.getNode()) &&
Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
SDLoc DL(Op);
EVT SrcVT = Src.getValueType();
@@ -2834,9 +2860,19 @@ bool TargetLowering::SimplifyDemandedVectorElts(
// TODO: There are more binop opcodes that could be handled here - MIN,
// MAX, saturated math, etc.
+ case ISD::ADD: {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ if (Op0 == Op1 && Op->isOnlyUserOf(Op0.getNode())) {
+ APInt UndefLHS, ZeroLHS;
+ if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
+ Depth + 1, /*AssumeSingleUse*/ true))
+ return true;
+ }
+ LLVM_FALLTHROUGH;
+ }
case ISD::OR:
case ISD::XOR:
- case ISD::ADD:
case ISD::SUB:
case ISD::FADD:
case ISD::FSUB:
@@ -5586,7 +5622,7 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
.multiplicativeInverse(APInt::getSignedMinValue(W + 1))
.trunc(W);
assert(!P.isZero() && "No multiplicative inverse!"); // unreachable
- assert((D0 * P).isOne() && "Multiplicative inverse sanity check.");
+ assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
// Q = floor((2^W - 1) u/ D)
// R = ((2^W - 1) u% D)
@@ -5832,7 +5868,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
.multiplicativeInverse(APInt::getSignedMinValue(W + 1))
.trunc(W);
assert(!P.isZero() && "No multiplicative inverse!"); // unreachable
- assert((D0 * P).isOne() && "Multiplicative inverse sanity check.");
+ assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
// A = floor((2^(W - 1) - 1) / D0) & -2^K
APInt A = APInt::getSignedMaxValue(W).udiv(D0);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp
index 9aea5a7a8853..f49ba5ccd447 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp
@@ -159,8 +159,7 @@ void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) {
// FIXME: Need the equivalent of MachineRegisterInfo for frameindex operands.
for (MachineBasicBlock &MBB : MF) {
for (MachineInstr &MI : MBB) {
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI.getOperand(i);
+ for (const MachineOperand &MO : MI.operands()) {
if (!MO.isFI())
continue;
int FI = MO.getIndex();
@@ -394,8 +393,7 @@ void StackSlotColoring::RewriteInstruction(MachineInstr &MI,
SmallVectorImpl<int> &SlotMapping,
MachineFunction &MF) {
// Update the operands.
- for (unsigned i = 0, ee = MI.getNumOperands(); i != ee; ++i) {
- MachineOperand &MO = MI.getOperand(i);
+ for (MachineOperand &MO : MI.operands()) {
if (!MO.isFI())
continue;
int OldFI = MO.getIndex();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp
index 943bd18c6c8b..54fc6ee45d00 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp
@@ -70,12 +70,6 @@ static cl::opt<unsigned> TailDupIndirectBranchSize(
"end with indirect branches."), cl::init(20),
cl::Hidden);
-static cl::opt<unsigned> TailDupJmpTableLoopSize(
- "tail-dup-jmptable-loop-size",
- cl::desc("Maximum loop latches to consider tail duplication that are "
- "successors of loop header."),
- cl::init(128), cl::Hidden);
-
static cl::opt<bool>
TailDupVerify("tail-dup-verify",
cl::desc("Verify sanity of PHI instructions during taildup"),
@@ -569,29 +563,6 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
if (TailBB.isSuccessor(&TailBB))
return false;
- // When doing tail-duplication with jumptable loops like:
- // 1 -> 2 <-> 3 |
- // \ <-> 4 |
- // \ <-> 5 |
- // \ <-> ... |
- // \---> rest |
- // quadratic number of edges and much more loops are added to CFG. This
- // may cause compile time regression when jumptable is quiet large.
- // So set the limit on jumptable cases.
- auto isLargeJumpTableLoop = [](const MachineBasicBlock &TailBB) {
- const SmallPtrSet<const MachineBasicBlock *, 8> Preds(TailBB.pred_begin(),
- TailBB.pred_end());
- // Check the basic block has large number of successors, all of them only
- // have one successor which is the basic block itself.
- return llvm::count_if(
- TailBB.successors(), [&](const MachineBasicBlock *SuccBB) {
- return Preds.count(SuccBB) && SuccBB->succ_size() == 1;
- }) > TailDupJmpTableLoopSize;
- };
-
- if (isLargeJumpTableLoop(TailBB))
- return false;
-
// Set the limit on the cost to duplicate. When optimizing for size,
// duplicate only one, because one branch instruction can be eliminated to
// compensate for the duplication.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
index b0594ec086b2..fbf190a52585 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -136,6 +136,16 @@ unsigned TargetFrameLowering::getStackAlignmentSkew(
return 0;
}
+bool TargetFrameLowering::allocateScavengingFrameIndexesNearIncomingSP(
+ const MachineFunction &MF) const {
+ if (!hasFP(MF))
+ return false;
+
+ const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
+ return RegInfo->useFPForScavengingIndex(MF) &&
+ !RegInfo->hasStackRealignment(MF);
+}
+
bool TargetFrameLowering::isSafeForNoCSROpt(const Function &F) {
if (!F.hasLocalLinkage() || F.hasAddressTaken() ||
!F.hasFnAttribute(Attribute::NoRecurse))
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp
index e74b3195a130..5119dac36713 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -957,8 +957,7 @@ bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric(
// If any of the registers accessed are non-constant, conservatively assume
// the instruction is not rematerializable.
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI.getOperand(i);
+ for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg()) continue;
Register Reg = MO.getReg();
if (Reg == 0)
@@ -1401,3 +1400,21 @@ std::string TargetInstrInfo::createMIROperandComment(
}
TargetInstrInfo::PipelinerLoopInfo::~PipelinerLoopInfo() {}
+
+void TargetInstrInfo::mergeOutliningCandidateAttributes(
+ Function &F, std::vector<outliner::Candidate> &Candidates) const {
+ // Include target features from an arbitrary candidate for the outlined
+ // function. This makes sure the outlined function knows what kinds of
+ // instructions are going into it. This is fine, since all parent functions
+ // must necessarily support the instructions that are in the outlined region.
+ outliner::Candidate &FirstCand = Candidates.front();
+ const Function &ParentFn = FirstCand.getMF()->getFunction();
+ if (ParentFn.hasFnAttribute("target-features"))
+ F.addFnAttr(ParentFn.getFnAttribute("target-features"));
+
+ // Set nounwind, so we don't generate eh_frame.
+ if (llvm::all_of(Candidates, [](const outliner::Candidate &C) {
+ return C.getMF()->getFunction().hasFnAttribute(Attribute::NoUnwind);
+ }))
+ F.addFnAttr(Attribute::NoUnwind);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 1d3bb286c882..d1c2cdeb133b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -1082,7 +1082,7 @@ const MCExpr *TargetLoweringObjectFileELF::lowerRelativeReference(
if (!LHS->hasGlobalUnnamedAddr() || !LHS->getValueType()->isFunctionTy())
return nullptr;
- // Basic sanity checks.
+ // Basic correctness checks.
if (LHS->getType()->getPointerAddressSpace() != 0 ||
RHS->getType()->getPointerAddressSpace() != 0 || LHS->isThreadLocal() ||
RHS->isThreadLocal())
@@ -2135,7 +2135,7 @@ const MCExpr *TargetLoweringObjectFileWasm::lowerRelativeReference(
if (!LHS->hasGlobalUnnamedAddr() || !LHS->getValueType()->isFunctionTy())
return nullptr;
- // Basic sanity checks.
+ // Basic correctness checks.
if (LHS->getType()->getPointerAddressSpace() != 0 ||
RHS->getType()->getPointerAddressSpace() != 0 || LHS->isThreadLocal() ||
RHS->isThreadLocal())
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
index 46cec5407565..dfd962be2882 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -373,19 +373,25 @@ static bool isTwoAddrUse(MachineInstr &MI, Register Reg, Register &DstReg) {
return false;
}
-/// Given a register, if has a single in-basic block use, return the use
-/// instruction if it's a copy or a two-address use.
+/// Given a register, if all its uses are in the same basic block, return the
+/// last use instruction if it's a copy or a two-address use.
static MachineInstr *
findOnlyInterestingUse(Register Reg, MachineBasicBlock *MBB,
MachineRegisterInfo *MRI, const TargetInstrInfo *TII,
- bool &IsCopy, Register &DstReg, bool &IsDstPhys) {
- if (!MRI->hasOneNonDBGUse(Reg))
- // None or more than one use.
- return nullptr;
- MachineOperand &UseOp = *MRI->use_nodbg_begin(Reg);
- MachineInstr &UseMI = *UseOp.getParent();
- if (UseMI.getParent() != MBB)
+ bool &IsCopy, Register &DstReg, bool &IsDstPhys,
+ LiveIntervals *LIS) {
+ MachineOperand *UseOp = nullptr;
+ for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
+ MachineInstr *MI = MO.getParent();
+ if (MI->getParent() != MBB)
+ return nullptr;
+ if (isPlainlyKilled(MI, Reg, LIS))
+ UseOp = &MO;
+ }
+ if (!UseOp)
return nullptr;
+ MachineInstr &UseMI = *UseOp->getParent();
+
Register SrcReg;
bool IsSrcPhys;
if (isCopyToReg(UseMI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys)) {
@@ -399,7 +405,7 @@ findOnlyInterestingUse(Register Reg, MachineBasicBlock *MBB,
}
if (UseMI.isCommutable()) {
unsigned Src1 = TargetInstrInfo::CommuteAnyOperandIndex;
- unsigned Src2 = UseMI.getOperandNo(&UseOp);
+ unsigned Src2 = UseMI.getOperandNo(UseOp);
if (TII->findCommutedOpIndices(UseMI, Src1, Src2)) {
MachineOperand &MO = UseMI.getOperand(Src1);
if (MO.isReg() && MO.isUse() &&
@@ -492,8 +498,7 @@ void TwoAddressInstructionPass::removeClobberedSrcRegMap(MachineInstr *MI) {
return;
}
- for (unsigned i = 0, NumOps = MI->getNumOperands(); i != NumOps; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (const MachineOperand &MO : MI->operands()) {
if (MO.isRegMask()) {
removeMapRegEntry(MO, SrcRegMap, TRI);
continue;
@@ -685,7 +690,6 @@ bool TwoAddressInstructionPass::convertInstTo3Addr(
// If the old instruction is debug value tracked, an update is required.
if (auto OldInstrNum = mi->peekDebugInstrNum()) {
- // Sanity check.
assert(mi->getNumExplicitDefs() == 1);
assert(NewMI->getNumExplicitDefs() == 1);
@@ -724,7 +728,7 @@ void TwoAddressInstructionPass::scanUses(Register DstReg) {
Register NewReg;
Register Reg = DstReg;
while (MachineInstr *UseMI = findOnlyInterestingUse(Reg, MBB, MRI, TII,IsCopy,
- NewReg, IsDstPhys)) {
+ NewReg, IsDstPhys, LIS)) {
if (IsCopy && !Processed.insert(UseMI).second)
break;
@@ -1336,8 +1340,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
// Success, or at least we made an improvement. Keep the unfolded
// instructions and discard the original.
if (LV) {
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI.getOperand(i);
+ for (const MachineOperand &MO : MI.operands()) {
if (MO.isReg() && MO.getReg().isVirtual()) {
if (MO.isUse()) {
if (MO.isKill()) {
diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp
index fb0798f204e1..7673a721c4ea 100644
--- a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp
+++ b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp
@@ -15,6 +15,7 @@
#include "llvm/DebugInfo/DWARF/DWARFExpression.h"
#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
#include "llvm/DebugInfo/DWARF/DWARFSection.h"
+#include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h"
#include "llvm/Support/DJB.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/WithColor.h"
@@ -317,12 +318,33 @@ bool DWARFVerifier::handleDebugAbbrev() {
return NumErrors == 0;
}
-unsigned DWARFVerifier::verifyUnitSection(const DWARFSection &S,
- DWARFSectionKind SectionKind) {
+unsigned DWARFVerifier::verifyUnits(const DWARFUnitVector &Units) {
+ unsigned NumDebugInfoErrors = 0;
+ ReferenceMap CrossUnitReferences;
+
+ for (const auto &Unit : Units) {
+ ReferenceMap UnitLocalReferences;
+ NumDebugInfoErrors +=
+ verifyUnitContents(*Unit, UnitLocalReferences, CrossUnitReferences);
+ NumDebugInfoErrors += verifyDebugInfoReferences(
+ UnitLocalReferences, [&](uint64_t Offset) { return Unit.get(); });
+ }
+
+ NumDebugInfoErrors += verifyDebugInfoReferences(
+ CrossUnitReferences, [&](uint64_t Offset) -> DWARFUnit * {
+ if (DWARFUnit *U = Units.getUnitForOffset(Offset))
+ return U;
+ return nullptr;
+ });
+
+ return NumDebugInfoErrors;
+}
+
+unsigned DWARFVerifier::verifyUnitSection(const DWARFSection &S) {
const DWARFObject &DObj = DCtx.getDWARFObj();
DWARFDataExtractor DebugInfoData(DObj, S, DCtx.isLittleEndian(), 0);
unsigned NumDebugInfoErrors = 0;
- uint64_t OffsetStart = 0, Offset = 0, UnitIdx = 0;
+ uint64_t Offset = 0, UnitIdx = 0;
uint8_t UnitType = 0;
bool isUnitDWARF64 = false;
bool isHeaderChainValid = true;
@@ -334,48 +356,11 @@ unsigned DWARFVerifier::verifyUnitSection(const DWARFSection &S,
/// lies between to valid DIEs.
ReferenceMap CrossUnitReferences;
while (hasDIE) {
- OffsetStart = Offset;
if (!verifyUnitHeader(DebugInfoData, &Offset, UnitIdx, UnitType,
isUnitDWARF64)) {
isHeaderChainValid = false;
if (isUnitDWARF64)
break;
- } else {
- DWARFUnitHeader Header;
- Header.extract(DCtx, DebugInfoData, &OffsetStart, SectionKind);
- ReferenceMap UnitLocalReferences;
- DWARFUnit *Unit;
- switch (UnitType) {
- case dwarf::DW_UT_type:
- case dwarf::DW_UT_split_type: {
- Unit = TypeUnitVector.addUnit(std::make_unique<DWARFTypeUnit>(
- DCtx, S, Header, DCtx.getDebugAbbrev(), &DObj.getRangesSection(),
- &DObj.getLocSection(), DObj.getStrSection(),
- DObj.getStrOffsetsSection(), &DObj.getAddrSection(),
- DObj.getLineSection(), DCtx.isLittleEndian(), false,
- TypeUnitVector));
- break;
- }
- case dwarf::DW_UT_skeleton:
- case dwarf::DW_UT_split_compile:
- case dwarf::DW_UT_compile:
- case dwarf::DW_UT_partial:
- // UnitType = 0 means that we are verifying a compile unit in DWARF v4.
- case 0: {
- Unit = CompileUnitVector.addUnit(std::make_unique<DWARFCompileUnit>(
- DCtx, S, Header, DCtx.getDebugAbbrev(), &DObj.getRangesSection(),
- &DObj.getLocSection(), DObj.getStrSection(),
- DObj.getStrOffsetsSection(), &DObj.getAddrSection(),
- DObj.getLineSection(), DCtx.isLittleEndian(), false,
- CompileUnitVector));
- break;
- }
- default: { llvm_unreachable("Invalid UnitType."); }
- }
- NumDebugInfoErrors +=
- verifyUnitContents(*Unit, UnitLocalReferences, CrossUnitReferences);
- NumDebugInfoErrors += verifyDebugInfoReferences(
- UnitLocalReferences, [&](uint64_t Offset) { return Unit; });
}
hasDIE = DebugInfoData.isValidOffset(Offset);
++UnitIdx;
@@ -386,14 +371,6 @@ unsigned DWARFVerifier::verifyUnitSection(const DWARFSection &S,
}
if (!isHeaderChainValid)
++NumDebugInfoErrors;
- NumDebugInfoErrors += verifyDebugInfoReferences(
- CrossUnitReferences, [&](uint64_t Offset) -> DWARFUnit * {
- if (DWARFUnit *U = TypeUnitVector.getUnitForOffset(Offset))
- return U;
- if (DWARFUnit *U = CompileUnitVector.getUnitForOffset(Offset))
- return U;
- return nullptr;
- });
return NumDebugInfoErrors;
}
@@ -403,13 +380,16 @@ bool DWARFVerifier::handleDebugInfo() {
OS << "Verifying .debug_info Unit Header Chain...\n";
DObj.forEachInfoSections([&](const DWARFSection &S) {
- NumErrors += verifyUnitSection(S, DW_SECT_INFO);
+ NumErrors += verifyUnitSection(S);
});
OS << "Verifying .debug_types Unit Header Chain...\n";
DObj.forEachTypesSections([&](const DWARFSection &S) {
- NumErrors += verifyUnitSection(S, DW_SECT_EXT_TYPES);
+ NumErrors += verifyUnitSection(S);
});
+
+ OS << "Verifying non-dwo Units...\n";
+ NumErrors += verifyUnits(DCtx.getNormalUnitsVector());
return NumErrors == 0;
}
diff --git a/contrib/llvm-project/llvm/lib/Demangle/DLangDemangle.cpp b/contrib/llvm-project/llvm/lib/Demangle/DLangDemangle.cpp
index d2f1bf4323ee..f380aa90035e 100644
--- a/contrib/llvm-project/llvm/lib/Demangle/DLangDemangle.cpp
+++ b/contrib/llvm-project/llvm/lib/Demangle/DLangDemangle.cpp
@@ -14,12 +14,250 @@
//===----------------------------------------------------------------------===//
#include "llvm/Demangle/Demangle.h"
+#include "llvm/Demangle/StringView.h"
#include "llvm/Demangle/Utility.h"
+#include <cctype>
#include <cstring>
+#include <limits>
using namespace llvm;
using llvm::itanium_demangle::OutputBuffer;
+using llvm::itanium_demangle::StringView;
+
+namespace {
+
+/// Demangle information structure.
+struct Demangler {
+ /// Initialize the information structure we use to pass around information.
+ ///
+ /// \param Mangled String to demangle.
+ Demangler(const char *Mangled);
+
+ /// Extract and demangle the mangled symbol and append it to the output
+ /// string.
+ ///
+ /// \param Demangled Output buffer to write the demangled name.
+ ///
+ /// \return The remaining string on success or nullptr on failure.
+ ///
+ /// \see https://dlang.org/spec/abi.html#name_mangling .
+ /// \see https://dlang.org/spec/abi.html#MangledName .
+ const char *parseMangle(OutputBuffer *Demangled);
+
+private:
+ /// Extract and demangle a given mangled symbol and append it to the output
+ /// string.
+ ///
+ /// \param Demangled output buffer to write the demangled name.
+ /// \param Mangled mangled symbol to be demangled.
+ ///
+ /// \return The remaining string on success or nullptr on failure.
+ ///
+ /// \see https://dlang.org/spec/abi.html#name_mangling .
+ /// \see https://dlang.org/spec/abi.html#MangledName .
+ const char *parseMangle(OutputBuffer *Demangled, const char *Mangled);
+
+ /// Extract the number from a given string.
+ ///
+ /// \param Mangled string to extract the number.
+ /// \param Ret assigned result value.
+ ///
+ /// \return The remaining string on success or nullptr on failure.
+ ///
+ /// \note A result larger than UINT_MAX is considered a failure.
+ ///
+ /// \see https://dlang.org/spec/abi.html#Number .
+ const char *decodeNumber(const char *Mangled, unsigned long *Ret);
+
+ /// Check whether it is the beginning of a symbol name.
+ ///
+ /// \param Mangled string to extract the symbol name.
+ ///
+ /// \return true on success, false otherwise.
+ ///
+ /// \see https://dlang.org/spec/abi.html#SymbolName .
+ bool isSymbolName(const char *Mangled);
+
+ /// Extract and demangle an identifier from a given mangled symbol append it
+ /// to the output string.
+ ///
+ /// \param Demangled Output buffer to write the demangled name.
+ /// \param Mangled Mangled symbol to be demangled.
+ ///
+ /// \return The remaining string on success or nullptr on failure.
+ ///
+ /// \see https://dlang.org/spec/abi.html#SymbolName .
+ const char *parseIdentifier(OutputBuffer *Demangled, const char *Mangled);
+
+ /// Extract and demangle the plain identifier from a given mangled symbol and
+ /// prepend/append it to the output string, with a special treatment for some
+ /// magic compiler generated symbols.
+ ///
+ /// \param Demangled Output buffer to write the demangled name.
+ /// \param Mangled Mangled symbol to be demangled.
+ /// \param Len Length of the mangled symbol name.
+ ///
+ /// \return The remaining string on success or nullptr on failure.
+ ///
+ /// \see https://dlang.org/spec/abi.html#LName .
+ const char *parseLName(OutputBuffer *Demangled, const char *Mangled,
+ unsigned long Len);
+
+ /// Extract and demangle the qualified symbol from a given mangled symbol
+ /// append it to the output string.
+ ///
+ /// \param Demangled Output buffer to write the demangled name.
+ /// \param Mangled Mangled symbol to be demangled.
+ ///
+ /// \return The remaining string on success or nullptr on failure.
+ ///
+ /// \see https://dlang.org/spec/abi.html#QualifiedName .
+ const char *parseQualified(OutputBuffer *Demangled, const char *Mangled);
+
+ /// The string we are demangling.
+ const char *Str;
+};
+
+} // namespace
+
+const char *Demangler::decodeNumber(const char *Mangled, unsigned long *Ret) {
+ // Return nullptr if trying to extract something that isn't a digit.
+ if (Mangled == nullptr || !std::isdigit(*Mangled))
+ return nullptr;
+
+ unsigned long Val = 0;
+
+ do {
+ unsigned long Digit = Mangled[0] - '0';
+
+ // Check for overflow.
+ if (Val > (std::numeric_limits<unsigned int>::max() - Digit) / 10)
+ return nullptr;
+
+ Val = Val * 10 + Digit;
+ ++Mangled;
+ } while (std::isdigit(*Mangled));
+
+ if (*Mangled == '\0')
+ return nullptr;
+
+ *Ret = Val;
+ return Mangled;
+}
+
+bool Demangler::isSymbolName(const char *Mangled) {
+ if (std::isdigit(*Mangled))
+ return true;
+
+ // TODO: Handle symbol back references and template instances.
+ return false;
+}
+
+const char *Demangler::parseMangle(OutputBuffer *Demangled,
+ const char *Mangled) {
+ // A D mangled symbol is comprised of both scope and type information.
+ // MangleName:
+ // _D QualifiedName Type
+ // _D QualifiedName Z
+ // ^
+ // The caller should have guaranteed that the start pointer is at the
+ // above location.
+ // Note that type is never a function type, but only the return type of
+ // a function or the type of a variable.
+ Mangled += 2;
+
+ Mangled = parseQualified(Demangled, Mangled);
+
+ if (Mangled != nullptr) {
+ // Artificial symbols end with 'Z' and have no type.
+ if (*Mangled == 'Z')
+ ++Mangled;
+ else {
+ // TODO: Implement symbols with types.
+ return nullptr;
+ }
+ }
+
+ return Mangled;
+}
+
+const char *Demangler::parseQualified(OutputBuffer *Demangled,
+ const char *Mangled) {
+ // Qualified names are identifiers separated by their encoded length.
+ // Nested functions also encode their argument types without specifying
+ // what they return.
+ // QualifiedName:
+ // SymbolFunctionName
+ // SymbolFunctionName QualifiedName
+ // ^
+ // SymbolFunctionName:
+ // SymbolName
+ // SymbolName TypeFunctionNoReturn
+ // SymbolName M TypeFunctionNoReturn
+ // SymbolName M TypeModifiers TypeFunctionNoReturn
+ // The start pointer should be at the above location.
+
+ // Whether it has more than one symbol
+ size_t NotFirst = false;
+ do {
+ // Skip over anonymous symbols.
+ if (*Mangled == '0') {
+ do
+ ++Mangled;
+ while (*Mangled == '0');
+
+ continue;
+ }
+
+ if (NotFirst)
+ *Demangled << '.';
+ NotFirst = true;
+
+ Mangled = parseIdentifier(Demangled, Mangled);
+
+ } while (Mangled && isSymbolName(Mangled));
+
+ return Mangled;
+}
+
+const char *Demangler::parseIdentifier(OutputBuffer *Demangled,
+ const char *Mangled) {
+ unsigned long Len;
+
+ if (Mangled == nullptr || *Mangled == '\0')
+ return nullptr;
+
+ // TODO: Parse back references and lengthless template instances.
+
+ const char *Endptr = decodeNumber(Mangled, &Len);
+
+ if (Endptr == nullptr || Len == 0)
+ return nullptr;
+
+ if (strlen(Endptr) < Len)
+ return nullptr;
+
+ Mangled = Endptr;
+
+ // TODO: Parse template instances with a length prefix.
+
+ return parseLName(Demangled, Mangled, Len);
+}
+
+const char *Demangler::parseLName(OutputBuffer *Demangled, const char *Mangled,
+ unsigned long Len) {
+ *Demangled << StringView(Mangled, Len);
+ Mangled += Len;
+
+ return Mangled;
+}
+
+Demangler::Demangler(const char *Mangled) : Str(Mangled) {}
+
+const char *Demangler::parseMangle(OutputBuffer *Demangled) {
+ return parseMangle(Demangled, this->Str);
+}
char *llvm::dlangDemangle(const char *MangledName) {
if (MangledName == nullptr || strncmp(MangledName, "_D", 2) != 0)
@@ -29,8 +267,19 @@ char *llvm::dlangDemangle(const char *MangledName) {
if (!initializeOutputBuffer(nullptr, nullptr, Demangled, 1024))
return nullptr;
- if (strcmp(MangledName, "_Dmain") == 0)
+ if (strcmp(MangledName, "_Dmain") == 0) {
Demangled << "D main";
+ } else {
+
+ Demangler D = Demangler(MangledName);
+ MangledName = D.parseMangle(&Demangled);
+
+ // Check that the entire symbol was successfully demangled.
+ if (MangledName == nullptr || *MangledName != '\0') {
+ std::free(Demangled.getBuffer());
+ return nullptr;
+ }
+ }
// OutputBuffer's internal buffer is not null terminated and therefore we need
// to add it to comply with C null terminated strings.
@@ -40,6 +289,6 @@ char *llvm::dlangDemangle(const char *MangledName) {
return Demangled.getBuffer();
}
- free(Demangled.getBuffer());
+ std::free(Demangled.getBuffer());
return nullptr;
}
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/ExecutionEngine.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/ExecutionEngine.cpp
index fe3c433bd2c5..a14bd4d2c3fd 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -1256,8 +1256,7 @@ void ExecutionEngine::emitGlobals() {
// If there are multiple modules, map the non-canonical globals to their
// canonical location.
if (!NonCanonicalGlobals.empty()) {
- for (unsigned i = 0, e = NonCanonicalGlobals.size(); i != e; ++i) {
- const GlobalValue *GV = NonCanonicalGlobals[i];
+ for (const GlobalValue *GV : NonCanonicalGlobals) {
const GlobalValue *CGV = LinkedGlobalsMap[std::make_pair(
std::string(GV->getName()), GV->getType())];
void *Ptr = getPointerToGlobalIfAvailable(CGV);
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h
index fdc987751286..f9101d71dfa8 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h
@@ -143,6 +143,9 @@ protected:
// Only SHF_ALLOC sections will have graph sections.
DenseMap<ELFSectionIndex, Section *> GraphSections;
DenseMap<ELFSymbolIndex, Symbol *> GraphSymbols;
+ DenseMap<const typename ELFFile::Elf_Shdr *,
+ ArrayRef<typename ELFFile::Elf_Word>>
+ ShndxTables;
};
template <typename ELFT>
@@ -241,7 +244,7 @@ template <typename ELFT> Error ELFLinkGraphBuilder<ELFT>::prepare() {
return SectionStringTabOrErr.takeError();
// Get the SHT_SYMTAB section.
- for (auto &Sec : Sections)
+ for (auto &Sec : Sections) {
if (Sec.sh_type == ELF::SHT_SYMTAB) {
if (!SymTabSec)
SymTabSec = &Sec;
@@ -250,6 +253,20 @@ template <typename ELFT> Error ELFLinkGraphBuilder<ELFT>::prepare() {
G->getName());
}
+ // Extended table.
+ if (Sec.sh_type == ELF::SHT_SYMTAB_SHNDX) {
+ uint32_t SymtabNdx = Sec.sh_link;
+ if (SymtabNdx >= Sections.size())
+ return make_error<JITLinkError>("sh_link is out of bound");
+
+ auto ShndxTable = Obj.getSHNDXTable(Sec);
+ if (!ShndxTable)
+ return ShndxTable.takeError();
+
+ ShndxTables.insert({&Sections[SymtabNdx], *ShndxTable});
+ }
+ }
+
return Error::success();
}
@@ -299,11 +316,6 @@ template <typename ELFT> Error ELFLinkGraphBuilder<ELFT>::graphifySections() {
else
Prot = MemProt::Read | MemProt::Write;
- // For now we just use this to skip the "undefined" section, probably need
- // to revist.
- if (Sec.sh_size == 0)
- continue;
-
auto &GraphSec = G->createSection(*Name, Prot);
if (Sec.sh_type != ELF::SHT_NOBITS) {
auto Data = Obj.template getSectionContentsAsArray<char>(Sec);
@@ -401,9 +413,19 @@ template <typename ELFT> Error ELFLinkGraphBuilder<ELFT>::graphifySymbols() {
(Sym.getType() == ELF::STT_NOTYPE || Sym.getType() == ELF::STT_FUNC ||
Sym.getType() == ELF::STT_OBJECT ||
Sym.getType() == ELF::STT_SECTION || Sym.getType() == ELF::STT_TLS)) {
-
- // FIXME: Handle extended tables.
- if (auto *GraphSec = getGraphSection(Sym.st_shndx)) {
+ // Handle extended tables.
+ unsigned Shndx = Sym.st_shndx;
+ if (Shndx == ELF::SHN_XINDEX) {
+ auto ShndxTable = ShndxTables.find(SymTabSec);
+ if (ShndxTable == ShndxTables.end())
+ continue;
+ auto NdxOrErr = object::getExtendedSymbolTableIndex<ELFT>(
+ Sym, SymIndex, ShndxTable->second);
+ if (!NdxOrErr)
+ return NdxOrErr.takeError();
+ Shndx = *NdxOrErr;
+ }
+ if (auto *GraphSec = getGraphSection(Shndx)) {
Block *B = nullptr;
{
auto Blocks = GraphSec->blocks();
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Core.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Core.cpp
index 6b24d6461b63..56a97f83d915 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Core.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/Core.cpp
@@ -612,9 +612,14 @@ void LookupState::continueLookup(Error Err) {
DefinitionGenerator::~DefinitionGenerator() {}
+JITDylib::~JITDylib() {
+ LLVM_DEBUG(dbgs() << "Destroying JITDylib " << getName() << "\n");
+}
+
Error JITDylib::clear() {
std::vector<ResourceTrackerSP> TrackersToRemove;
ES.runSessionLocked([&]() {
+ assert(State != Closed && "JD is defunct");
for (auto &KV : TrackerSymbols)
TrackersToRemove.push_back(KV.first);
TrackersToRemove.push_back(getDefaultResourceTracker());
@@ -628,6 +633,7 @@ Error JITDylib::clear() {
ResourceTrackerSP JITDylib::getDefaultResourceTracker() {
return ES.runSessionLocked([this] {
+ assert(State != Closed && "JD is defunct");
if (!DefaultTracker)
DefaultTracker = new ResourceTracker(this);
return DefaultTracker;
@@ -636,19 +642,22 @@ ResourceTrackerSP JITDylib::getDefaultResourceTracker() {
ResourceTrackerSP JITDylib::createResourceTracker() {
return ES.runSessionLocked([this] {
+ assert(State == Open && "JD is defunct");
ResourceTrackerSP RT = new ResourceTracker(this);
return RT;
});
}
void JITDylib::removeGenerator(DefinitionGenerator &G) {
- std::lock_guard<std::mutex> Lock(GeneratorsMutex);
- auto I = llvm::find_if(DefGenerators,
- [&](const std::shared_ptr<DefinitionGenerator> &H) {
- return H.get() == &G;
- });
- assert(I != DefGenerators.end() && "Generator not found");
- DefGenerators.erase(I);
+ ES.runSessionLocked([&] {
+ assert(State == Open && "JD is defunct");
+ auto I = llvm::find_if(DefGenerators,
+ [&](const std::shared_ptr<DefinitionGenerator> &H) {
+ return H.get() == &G;
+ });
+ assert(I != DefGenerators.end() && "Generator not found");
+ DefGenerators.erase(I);
+ });
}
Expected<SymbolFlagsMap>
@@ -708,10 +717,8 @@ Error JITDylib::replace(MaterializationResponsibility &FromMR,
auto Err =
ES.runSessionLocked([&, this]() -> Error {
- auto RT = getTracker(FromMR);
-
- if (RT->isDefunct())
- return make_error<ResourceTrackerDefunct>(std::move(RT));
+ if (FromMR.RT->isDefunct())
+ return make_error<ResourceTrackerDefunct>(std::move(FromMR.RT));
#ifndef NDEBUG
for (auto &KV : MU->getSymbols()) {
@@ -735,7 +742,8 @@ Error JITDylib::replace(MaterializationResponsibility &FromMR,
if (MII != MaterializingInfos.end()) {
if (MII->second.hasQueriesPending()) {
MustRunMR = ES.createMaterializationResponsibility(
- *RT, std::move(MU->SymbolFlags), std::move(MU->InitSymbol));
+ *FromMR.RT, std::move(MU->SymbolFlags),
+ std::move(MU->InitSymbol));
MustRunMU = std::move(MU);
return Error::success();
}
@@ -743,10 +751,8 @@ Error JITDylib::replace(MaterializationResponsibility &FromMR,
}
// Otherwise, make MU responsible for all the symbols.
- auto RTI = MRTrackers.find(&FromMR);
- assert(RTI != MRTrackers.end() && "No tracker for FromMR");
- auto UMI =
- std::make_shared<UnmaterializedInfo>(std::move(MU), RTI->second);
+ auto UMI = std::make_shared<UnmaterializedInfo>(std::move(MU),
+ FromMR.RT.get());
for (auto &KV : UMI->MU->getSymbols()) {
auto SymI = Symbols.find(KV.first);
assert(SymI->second.getState() == SymbolState::Materializing &&
@@ -787,13 +793,11 @@ JITDylib::delegate(MaterializationResponsibility &FromMR,
return ES.runSessionLocked(
[&]() -> Expected<std::unique_ptr<MaterializationResponsibility>> {
- auto RT = getTracker(FromMR);
-
- if (RT->isDefunct())
- return make_error<ResourceTrackerDefunct>(std::move(RT));
+ if (FromMR.RT->isDefunct())
+ return make_error<ResourceTrackerDefunct>(std::move(FromMR.RT));
return ES.createMaterializationResponsibility(
- *RT, std::move(SymbolFlags), std::move(InitSymbol));
+ *FromMR.RT, std::move(SymbolFlags), std::move(InitSymbol));
});
}
@@ -903,10 +907,13 @@ Error JITDylib::resolve(MaterializationResponsibility &MR,
AsynchronousSymbolQuerySet CompletedQueries;
if (auto Err = ES.runSessionLocked([&, this]() -> Error {
- auto RTI = MRTrackers.find(&MR);
- assert(RTI != MRTrackers.end() && "No resource tracker for MR?");
- if (RTI->second->isDefunct())
- return make_error<ResourceTrackerDefunct>(RTI->second);
+ if (MR.RT->isDefunct())
+ return make_error<ResourceTrackerDefunct>(MR.RT);
+
+ if (State != Open)
+ return make_error<StringError>("JITDylib " + getName() +
+ " is defunct",
+ inconvertibleErrorCode());
struct WorklistEntry {
SymbolTable::iterator SymI;
@@ -1001,10 +1008,13 @@ Error JITDylib::emit(MaterializationResponsibility &MR,
DenseMap<JITDylib *, SymbolNameVector> ReadySymbols;
if (auto Err = ES.runSessionLocked([&, this]() -> Error {
- auto RTI = MRTrackers.find(&MR);
- assert(RTI != MRTrackers.end() && "No resource tracker for MR?");
- if (RTI->second->isDefunct())
- return make_error<ResourceTrackerDefunct>(RTI->second);
+ if (MR.RT->isDefunct())
+ return make_error<ResourceTrackerDefunct>(MR.RT);
+
+ if (State != Open)
+ return make_error<StringError>("JITDylib " + getName() +
+ " is defunct",
+ inconvertibleErrorCode());
SymbolNameSet SymbolsInErrorState;
std::vector<SymbolTable::iterator> Worklist;
@@ -1149,9 +1159,12 @@ Error JITDylib::emit(MaterializationResponsibility &MR,
void JITDylib::unlinkMaterializationResponsibility(
MaterializationResponsibility &MR) {
ES.runSessionLocked([&]() {
- auto I = MRTrackers.find(&MR);
- assert(I != MRTrackers.end() && "MaterializationResponsibility not linked");
- MRTrackers.erase(I);
+ auto I = TrackerMRs.find(MR.RT.get());
+ assert(I != TrackerMRs.end() && "No MRs in TrackerMRs list for RT");
+ assert(I->second.count(&MR) && "MR not in TrackerMRs list for RT");
+ I->second.erase(&MR);
+ if (I->second.empty())
+ TrackerMRs.erase(MR.RT.get());
});
}
@@ -1169,8 +1182,16 @@ JITDylib::failSymbols(FailedSymbolsWorklist Worklist) {
(*FailedSymbolsMap)[&JD].insert(Name);
- assert(JD.Symbols.count(Name) && "No symbol table entry for Name");
- auto &Sym = JD.Symbols[Name];
+ // Look up the symbol to fail.
+ auto SymI = JD.Symbols.find(Name);
+
+ // It's possible that this symbol has already been removed, e.g. if a
+ // materialization failure happens concurrently with a ResourceTracker or
+ // JITDylib removal. In that case we can safely skip this symbol and
+ // continue.
+ if (SymI == JD.Symbols.end())
+ continue;
+ auto &Sym = SymI->second;
// Move the symbol into the error state.
// Note that this may be redundant: The symbol might already have been
@@ -1267,6 +1288,7 @@ JITDylib::failSymbols(FailedSymbolsWorklist Worklist) {
void JITDylib::setLinkOrder(JITDylibSearchOrder NewLinkOrder,
bool LinkAgainstThisJITDylibFirst) {
ES.runSessionLocked([&]() {
+ assert(State == Open && "JD is defunct");
if (LinkAgainstThisJITDylibFirst) {
LinkOrder.clear();
if (NewLinkOrder.empty() || NewLinkOrder.front().first != this)
@@ -1285,6 +1307,7 @@ void JITDylib::addToLinkOrder(JITDylib &JD, JITDylibLookupFlags JDLookupFlags) {
void JITDylib::replaceInLinkOrder(JITDylib &OldJD, JITDylib &NewJD,
JITDylibLookupFlags JDLookupFlags) {
ES.runSessionLocked([&]() {
+ assert(State == Open && "JD is defunct");
for (auto &KV : LinkOrder)
if (KV.first == &OldJD) {
KV = {&NewJD, JDLookupFlags};
@@ -1295,6 +1318,7 @@ void JITDylib::replaceInLinkOrder(JITDylib &OldJD, JITDylib &NewJD,
void JITDylib::removeFromLinkOrder(JITDylib &JD) {
ES.runSessionLocked([&]() {
+ assert(State == Open && "JD is defunct");
auto I = llvm::find_if(LinkOrder,
[&](const JITDylibSearchOrder::value_type &KV) {
return KV.first == &JD;
@@ -1306,6 +1330,7 @@ void JITDylib::removeFromLinkOrder(JITDylib &JD) {
Error JITDylib::remove(const SymbolNameSet &Names) {
return ES.runSessionLocked([&]() -> Error {
+ assert(State == Open && "JD is defunct");
using SymbolMaterializerItrPair =
std::pair<SymbolTable::iterator, UnmaterializedInfosMap::iterator>;
std::vector<SymbolMaterializerItrPair> SymbolsToRemove;
@@ -1365,8 +1390,23 @@ Error JITDylib::remove(const SymbolNameSet &Names) {
void JITDylib::dump(raw_ostream &OS) {
ES.runSessionLocked([&, this]() {
OS << "JITDylib \"" << getName() << "\" (ES: "
- << format("0x%016" PRIx64, reinterpret_cast<uintptr_t>(&ES)) << "):\n"
- << "Link order: " << LinkOrder << "\n"
+ << format("0x%016" PRIx64, reinterpret_cast<uintptr_t>(&ES))
+ << ", State = ";
+ switch (State) {
+ case Open:
+ OS << "Open";
+ break;
+ case Closing:
+ OS << "Closing";
+ break;
+ case Closed:
+ OS << "Closed";
+ break;
+ }
+ OS << ")\n";
+ if (State == Closed)
+ return;
+ OS << "Link order: " << LinkOrder << "\n"
<< "Symbol table:\n";
for (auto &KV : Symbols) {
@@ -1454,17 +1494,11 @@ JITDylib::JITDylib(ExecutionSession &ES, std::string Name)
LinkOrder.push_back({this, JITDylibLookupFlags::MatchAllSymbols});
}
-ResourceTrackerSP JITDylib::getTracker(MaterializationResponsibility &MR) {
- auto I = MRTrackers.find(&MR);
- assert(I != MRTrackers.end() && "MR is not linked");
- assert(I->second && "Linked tracker is null");
- return I->second;
-}
-
std::pair<JITDylib::AsynchronousSymbolQuerySet,
std::shared_ptr<SymbolDependenceMap>>
JITDylib::removeTracker(ResourceTracker &RT) {
// Note: Should be called under the session lock.
+ assert(State != Closed && "JD is defunct");
SymbolNameVector SymbolsToRemove;
std::vector<std::pair<JITDylib *, SymbolStringPtr>> SymbolsToFail;
@@ -1525,6 +1559,7 @@ JITDylib::removeTracker(ResourceTracker &RT) {
}
void JITDylib::transferTracker(ResourceTracker &DstRT, ResourceTracker &SrcRT) {
+ assert(State != Closed && "JD is defunct");
assert(&DstRT != &SrcRT && "No-op transfers shouldn't call transferTracker");
assert(&DstRT.getJITDylib() == this && "DstRT is not for this JITDylib");
assert(&SrcRT.getJITDylib() == this && "SrcRT is not for this JITDylib");
@@ -1536,9 +1571,22 @@ void JITDylib::transferTracker(ResourceTracker &DstRT, ResourceTracker &SrcRT) {
}
// Update trackers for any active materialization responsibilities.
- for (auto &KV : MRTrackers) {
- if (KV.second == &SrcRT)
- KV.second = &DstRT;
+ {
+ auto I = TrackerMRs.find(&SrcRT);
+ if (I != TrackerMRs.end()) {
+ auto &SrcMRs = I->second;
+ auto &DstMRs = TrackerMRs[&DstRT];
+ for (auto *MR : SrcMRs)
+ MR->RT = &DstRT;
+ if (DstMRs.empty())
+ DstMRs = std::move(SrcMRs);
+ else
+ for (auto *MR : SrcMRs)
+ DstMRs.insert(MR);
+ // Erase SrcRT entry in TrackerMRs. Use &SrcRT key rather than iterator I
+ // for this, since I may have been invalidated by 'TrackerMRs[&DstRT]'.
+ TrackerMRs.erase(&SrcRT);
+ }
}
// If we're transfering to the default tracker we just need to delete the
@@ -1872,6 +1920,40 @@ Expected<JITDylib &> ExecutionSession::createJITDylib(std::string Name) {
return JD;
}
+Error ExecutionSession::removeJITDylib(JITDylib &JD) {
+ // Keep JD alive throughout this routine, even if all other references
+ // have been dropped.
+ JITDylibSP JDKeepAlive = &JD;
+
+ // Set JD to 'Closing' state and remove JD from the ExecutionSession.
+ runSessionLocked([&] {
+ assert(JD.State == JITDylib::Open && "JD already closed");
+ JD.State = JITDylib::Closing;
+ auto I = llvm::find(JDs, &JD);
+ assert(I != JDs.end() && "JD does not appear in session JDs");
+ JDs.erase(I);
+ });
+
+ // Clear the JITDylib.
+ auto Err = JD.clear();
+
+ // Set JD to closed state. Clear remaining data structures.
+ runSessionLocked([&] {
+ assert(JD.State == JITDylib::Closing && "JD should be closing");
+ JD.State = JITDylib::Closed;
+ assert(JD.Symbols.empty() && "JD.Symbols is not empty after clear");
+ assert(JD.UnmaterializedInfos.empty() &&
+ "JD.UnmaterializedInfos is not empty after clear");
+ assert(JD.MaterializingInfos.empty() &&
+ "JD.MaterializingInfos is not empty after clear");
+ assert(JD.TrackerSymbols.empty() &&
+ "TrackerSymbols is not empty after clear");
+ JD.DefGenerators.clear();
+ JD.LinkOrder.clear();
+ });
+ return Err;
+}
+
std::vector<JITDylibSP> JITDylib::getDFSLinkOrder(ArrayRef<JITDylibSP> JDs) {
if (JDs.empty())
return {};
@@ -1883,6 +1965,8 @@ std::vector<JITDylibSP> JITDylib::getDFSLinkOrder(ArrayRef<JITDylibSP> JDs) {
for (auto &JD : JDs) {
+ assert(JD->State == Open && "JD is defunct");
+
if (Visited.count(JD.get()))
continue;
@@ -2311,8 +2395,11 @@ void ExecutionSession::OL_applyQueryPhase1(
});
// Build the definition generator stack for this JITDylib.
- for (auto &DG : reverse(JD.DefGenerators))
- IPLS->CurDefGeneratorStack.push_back(DG);
+ runSessionLocked([&] {
+ IPLS->CurDefGeneratorStack.reserve(JD.DefGenerators.size());
+ for (auto &DG : reverse(JD.DefGenerators))
+ IPLS->CurDefGeneratorStack.push_back(DG);
+ });
// Flag that we've done our initialization.
IPLS->NewJITDylib = false;
@@ -2629,17 +2716,15 @@ void ExecutionSession::OL_completeLookup(
LLVM_DEBUG(dbgs() << "Adding MUs to dispatch:\n");
for (auto &KV : CollectedUMIs) {
- auto &JD = *KV.first;
LLVM_DEBUG({
+ auto &JD = *KV.first;
dbgs() << " For " << JD.getName() << ": Adding " << KV.second.size()
<< " MUs.\n";
});
for (auto &UMI : KV.second) {
- std::unique_ptr<MaterializationResponsibility> MR(
- new MaterializationResponsibility(
- &JD, std::move(UMI->MU->SymbolFlags),
- std::move(UMI->MU->InitSymbol)));
- JD.MRTrackers[MR.get()] = UMI->RT;
+ auto MR = createMaterializationResponsibility(
+ *UMI->RT, std::move(UMI->MU->SymbolFlags),
+ std::move(UMI->MU->InitSymbol));
OutstandingMUs.push_back(
std::make_pair(std::move(UMI->MU), std::move(MR)));
}
@@ -2757,18 +2842,18 @@ void ExecutionSession::OL_destroyMaterializationResponsibility(
assert(MR.SymbolFlags.empty() &&
"All symbols should have been explicitly materialized or failed");
- MR.JD->unlinkMaterializationResponsibility(MR);
+ MR.JD.unlinkMaterializationResponsibility(MR);
}
SymbolNameSet ExecutionSession::OL_getRequestedSymbols(
const MaterializationResponsibility &MR) {
- return MR.JD->getRequestedSymbols(MR.SymbolFlags);
+ return MR.JD.getRequestedSymbols(MR.SymbolFlags);
}
Error ExecutionSession::OL_notifyResolved(MaterializationResponsibility &MR,
const SymbolMap &Symbols) {
LLVM_DEBUG({
- dbgs() << "In " << MR.JD->getName() << " resolving " << Symbols << "\n";
+ dbgs() << "In " << MR.JD.getName() << " resolving " << Symbols << "\n";
});
#ifndef NDEBUG
for (auto &KV : Symbols) {
@@ -2783,15 +2868,16 @@ Error ExecutionSession::OL_notifyResolved(MaterializationResponsibility &MR,
}
#endif
- return MR.JD->resolve(MR, Symbols);
+ return MR.JD.resolve(MR, Symbols);
}
Error ExecutionSession::OL_notifyEmitted(MaterializationResponsibility &MR) {
LLVM_DEBUG({
- dbgs() << "In " << MR.JD->getName() << " emitting " << MR.SymbolFlags << "\n";
+ dbgs() << "In " << MR.JD.getName() << " emitting " << MR.SymbolFlags
+ << "\n";
});
- if (auto Err = MR.JD->emit(MR, MR.SymbolFlags))
+ if (auto Err = MR.JD.emit(MR, MR.SymbolFlags))
return Err;
MR.SymbolFlags.clear();
@@ -2802,10 +2888,11 @@ Error ExecutionSession::OL_defineMaterializing(
MaterializationResponsibility &MR, SymbolFlagsMap NewSymbolFlags) {
LLVM_DEBUG({
- dbgs() << "In " << MR.JD->getName() << " defining materializing symbols "
+ dbgs() << "In " << MR.JD.getName() << " defining materializing symbols "
<< NewSymbolFlags << "\n";
});
- if (auto AcceptedDefs = MR.JD->defineMaterializing(std::move(NewSymbolFlags))) {
+ if (auto AcceptedDefs =
+ MR.JD.defineMaterializing(std::move(NewSymbolFlags))) {
// Add all newly accepted symbols to this responsibility object.
for (auto &KV : *AcceptedDefs)
MR.SymbolFlags.insert(KV);
@@ -2817,14 +2904,14 @@ Error ExecutionSession::OL_defineMaterializing(
void ExecutionSession::OL_notifyFailed(MaterializationResponsibility &MR) {
LLVM_DEBUG({
- dbgs() << "In " << MR.JD->getName() << " failing materialization for "
+ dbgs() << "In " << MR.JD.getName() << " failing materialization for "
<< MR.SymbolFlags << "\n";
});
JITDylib::FailedSymbolsWorklist Worklist;
for (auto &KV : MR.SymbolFlags)
- Worklist.push_back(std::make_pair(MR.JD.get(), KV.first));
+ Worklist.push_back(std::make_pair(&MR.JD, KV.first));
MR.SymbolFlags.clear();
if (Worklist.empty())
@@ -2834,9 +2921,8 @@ void ExecutionSession::OL_notifyFailed(MaterializationResponsibility &MR) {
std::shared_ptr<SymbolDependenceMap> FailedSymbols;
runSessionLocked([&]() {
- auto RTI = MR.JD->MRTrackers.find(&MR);
- assert(RTI != MR.JD->MRTrackers.end() && "No tracker for this");
- if (RTI->second->isDefunct())
+ // If the tracker is defunct then there's nothing to do here.
+ if (MR.RT->isDefunct())
return;
std::tie(FailedQueries, FailedSymbols) =
@@ -2858,12 +2944,12 @@ Error ExecutionSession::OL_replace(MaterializationResponsibility &MR,
if (MU->getInitializerSymbol() == MR.InitSymbol)
MR.InitSymbol = nullptr;
- LLVM_DEBUG(MR.JD->getExecutionSession().runSessionLocked([&]() {
- dbgs() << "In " << MR.JD->getName() << " replacing symbols with " << *MU
+ LLVM_DEBUG(MR.JD.getExecutionSession().runSessionLocked([&]() {
+ dbgs() << "In " << MR.JD.getName() << " replacing symbols with " << *MU
<< "\n";
}););
- return MR.JD->replace(MR, std::move(MU));
+ return MR.JD.replace(MR, std::move(MU));
}
Expected<std::unique_ptr<MaterializationResponsibility>>
@@ -2886,8 +2972,8 @@ ExecutionSession::OL_delegate(MaterializationResponsibility &MR,
MR.SymbolFlags.erase(I);
}
- return MR.JD->delegate(MR, std::move(DelegatedFlags),
- std::move(DelegatedInitSymbol));
+ return MR.JD.delegate(MR, std::move(DelegatedFlags),
+ std::move(DelegatedInitSymbol));
}
void ExecutionSession::OL_addDependencies(
@@ -2899,7 +2985,7 @@ void ExecutionSession::OL_addDependencies(
});
assert(MR.SymbolFlags.count(Name) &&
"Symbol not covered by this MaterializationResponsibility instance");
- MR.JD->addDependencies(Name, Dependencies);
+ MR.JD.addDependencies(Name, Dependencies);
}
void ExecutionSession::OL_addDependenciesForAll(
@@ -2910,7 +2996,7 @@ void ExecutionSession::OL_addDependenciesForAll(
<< Dependencies << "\n";
});
for (auto &KV : MR.SymbolFlags)
- MR.JD->addDependencies(KV.first, Dependencies);
+ MR.JD.addDependencies(KV.first, Dependencies);
}
#ifndef NDEBUG
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index 1b7fdb588275..0de76ab78e0f 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -1301,7 +1301,7 @@ RuntimeDyldELF::processRelocationRef(
MemMgr.allowStubAllocation()) {
resolveAArch64Branch(SectionID, Value, RelI, Stubs);
} else if (RelType == ELF::R_AARCH64_ADR_GOT_PAGE) {
- // Craete new GOT entry or find existing one. If GOT entry is
+ // Create new GOT entry or find existing one. If GOT entry is
// to be created, then we also emit ABS64 relocation for it.
uint64_t GOTOffset = findOrAllocGOTEntry(Value, ELF::R_AARCH64_ABS64);
resolveGOTOffsetRelocation(SectionID, Offset, GOTOffset + Addend,
diff --git a/contrib/llvm-project/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/contrib/llvm-project/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index ce998df757ec..18f1a2314853 100644
--- a/contrib/llvm-project/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -993,6 +993,8 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSections(
Value *ST = ConstantInt::get(I32Ty, 1);
llvm::CanonicalLoopInfo *LoopInfo = createCanonicalLoop(
Loc, LoopBodyGenCB, LB, UB, ST, true, false, AllocaIP, "section_loop");
+ Builder.SetInsertPoint(AllocaIP.getBlock()->getTerminator());
+ AllocaIP = Builder.saveIP();
InsertPointTy AfterIP =
applyStaticWorkshareLoop(Loc.DL, LoopInfo, AllocaIP, true);
BasicBlock *LoopAfterBB = AfterIP.getBlock();
diff --git a/contrib/llvm-project/llvm/lib/IR/AsmWriter.cpp b/contrib/llvm-project/llvm/lib/IR/AsmWriter.cpp
index 7734c0a8de58..c9748e1387eb 100644
--- a/contrib/llvm-project/llvm/lib/IR/AsmWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/AsmWriter.cpp
@@ -353,12 +353,11 @@ void llvm::printLLVMNameWithoutPrefix(raw_ostream &OS, StringRef Name) {
// Scan the name to see if it needs quotes first.
bool NeedsQuotes = isdigit(static_cast<unsigned char>(Name[0]));
if (!NeedsQuotes) {
- for (unsigned i = 0, e = Name.size(); i != e; ++i) {
+ for (unsigned char C : Name) {
// By making this unsigned, the value passed in to isalnum will always be
// in the range 0-255. This is important when building with MSVC because
// its implementation will assert. This situation can arise when dealing
// with UTF-8 multibyte characters.
- unsigned char C = Name[i];
if (!isalnum(static_cast<unsigned char>(C)) && C != '-' && C != '.' &&
C != '_') {
NeedsQuotes = true;
@@ -1309,27 +1308,8 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Metadata *MD,
bool FromValue = false);
static void WriteOptimizationInfo(raw_ostream &Out, const User *U) {
- if (const FPMathOperator *FPO = dyn_cast<const FPMathOperator>(U)) {
- // 'Fast' is an abbreviation for all fast-math-flags.
- if (FPO->isFast())
- Out << " fast";
- else {
- if (FPO->hasAllowReassoc())
- Out << " reassoc";
- if (FPO->hasNoNaNs())
- Out << " nnan";
- if (FPO->hasNoInfs())
- Out << " ninf";
- if (FPO->hasNoSignedZeros())
- Out << " nsz";
- if (FPO->hasAllowReciprocal())
- Out << " arcp";
- if (FPO->hasAllowContract())
- Out << " contract";
- if (FPO->hasApproxFunc())
- Out << " afn";
- }
- }
+ if (const FPMathOperator *FPO = dyn_cast<const FPMathOperator>(U))
+ Out << FPO->getFastMathFlags();
if (const OverflowingBinaryOperator *OBO =
dyn_cast<OverflowingBinaryOperator>(U)) {
diff --git a/contrib/llvm-project/llvm/lib/IR/Core.cpp b/contrib/llvm-project/llvm/lib/IR/Core.cpp
index 905372982dc2..2c396ae97499 100644
--- a/contrib/llvm-project/llvm/lib/IR/Core.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/Core.cpp
@@ -2266,6 +2266,14 @@ LLVMValueRef LLVMAddAlias(LLVMModuleRef M, LLVMTypeRef Ty, LLVMValueRef Aliasee,
unwrap<Constant>(Aliasee), unwrap(M)));
}
+LLVMValueRef LLVMAddAlias2(LLVMModuleRef M, LLVMTypeRef ValueTy,
+ unsigned AddrSpace, LLVMValueRef Aliasee,
+ const char *Name) {
+ return wrap(GlobalAlias::create(unwrap(ValueTy), AddrSpace,
+ GlobalValue::ExternalLinkage, Name,
+ unwrap<Constant>(Aliasee), unwrap(M)));
+}
+
LLVMValueRef LLVMGetNamedGlobalAlias(LLVMModuleRef M,
const char *Name, size_t NameLen) {
return wrap(unwrap(M)->getNamedAlias(Name));
diff --git a/contrib/llvm-project/llvm/lib/IR/DIBuilder.cpp b/contrib/llvm-project/llvm/lib/IR/DIBuilder.cpp
index ca7dafc814ce..548962bd6a98 100644
--- a/contrib/llvm-project/llvm/lib/IR/DIBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/DIBuilder.cpp
@@ -34,7 +34,20 @@ static cl::opt<bool>
DIBuilder::DIBuilder(Module &m, bool AllowUnresolvedNodes, DICompileUnit *CU)
: M(m), VMContext(M.getContext()), CUNode(CU), DeclareFn(nullptr),
ValueFn(nullptr), LabelFn(nullptr),
- AllowUnresolvedNodes(AllowUnresolvedNodes) {}
+ AllowUnresolvedNodes(AllowUnresolvedNodes) {
+ if (CUNode) {
+ if (const auto &ETs = CUNode->getEnumTypes())
+ AllEnumTypes.assign(ETs.begin(), ETs.end());
+ if (const auto &RTs = CUNode->getRetainedTypes())
+ AllRetainTypes.assign(RTs.begin(), RTs.end());
+ if (const auto &GVs = CUNode->getGlobalVariables())
+ AllGVs.assign(GVs.begin(), GVs.end());
+ if (const auto &IMs = CUNode->getImportedEntities())
+ AllImportedModules.assign(IMs.begin(), IMs.end());
+ if (const auto &MNs = CUNode->getMacros())
+ AllMacrosPerParent.insert({nullptr, {MNs.begin(), MNs.end()}});
+ }
+}
void DIBuilder::trackIfUnresolved(MDNode *N) {
if (!N)
diff --git a/contrib/llvm-project/llvm/lib/IR/Instructions.cpp b/contrib/llvm-project/llvm/lib/IR/Instructions.cpp
index c42df49d97ea..ad27a6d8c08e 100644
--- a/contrib/llvm-project/llvm/lib/IR/Instructions.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/Instructions.cpp
@@ -2474,7 +2474,7 @@ bool ShuffleVectorInst::isReplicationMask(ArrayRef<int> Mask,
// Additionally, mask size is a replication factor multiplied by vector size,
// which further significantly reduces the search space.
- // Before doing that, let's perform basic sanity check first.
+ // Before doing that, let's perform basic correctness checking first.
int Largest = -1;
for (int MaskElt : Mask) {
if (MaskElt == UndefMaskElem)
diff --git a/contrib/llvm-project/llvm/lib/IR/IntrinsicInst.cpp b/contrib/llvm-project/llvm/lib/IR/IntrinsicInst.cpp
index 7552906fd07a..9206cd37a6d1 100644
--- a/contrib/llvm-project/llvm/lib/IR/IntrinsicInst.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/IntrinsicInst.cpp
@@ -358,13 +358,13 @@ Value *VPIntrinsic::getMemoryPointerParam() const {
Optional<unsigned> VPIntrinsic::getMemoryPointerParamPos(Intrinsic::ID VPID) {
switch (VPID) {
default:
- return None;
-
-#define HANDLE_VP_IS_MEMOP(VPID, POINTERPOS, DATAPOS) \
- case Intrinsic::VPID: \
- return POINTERPOS;
+ break;
+#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID:
+#define VP_PROPERTY_MEMOP(POINTERPOS, ...) return POINTERPOS;
+#define END_REGISTER_VP_INTRINSIC(VPID) break;
#include "llvm/IR/VPIntrinsics.def"
}
+ return None;
}
/// \return The data (payload) operand of this store or scatter.
@@ -378,52 +378,51 @@ Value *VPIntrinsic::getMemoryDataParam() const {
Optional<unsigned> VPIntrinsic::getMemoryDataParamPos(Intrinsic::ID VPID) {
switch (VPID) {
default:
- return None;
-
-#define HANDLE_VP_IS_MEMOP(VPID, POINTERPOS, DATAPOS) \
- case Intrinsic::VPID: \
- return DATAPOS;
+ break;
+#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID:
+#define VP_PROPERTY_MEMOP(POINTERPOS, DATAPOS) return DATAPOS;
+#define END_REGISTER_VP_INTRINSIC(VPID) break;
#include "llvm/IR/VPIntrinsics.def"
}
+ return None;
}
bool VPIntrinsic::isVPIntrinsic(Intrinsic::ID ID) {
switch (ID) {
default:
- return false;
-
+ break;
#define BEGIN_REGISTER_VP_INTRINSIC(VPID, MASKPOS, VLENPOS) \
case Intrinsic::VPID: \
- break;
+ return true;
#include "llvm/IR/VPIntrinsics.def"
}
- return true;
+ return false;
}
// Equivalent non-predicated opcode
Optional<unsigned> VPIntrinsic::getFunctionalOpcodeForVP(Intrinsic::ID ID) {
- Optional<unsigned> FunctionalOC;
switch (ID) {
default:
break;
#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID:
-#define HANDLE_VP_TO_OPC(OPC) FunctionalOC = Instruction::OPC;
-#define END_REGISTER_VP_INTRINSIC(...) break;
+#define VP_PROPERTY_FUNCTIONAL_OPC(OPC) return Instruction::OPC;
+#define END_REGISTER_VP_INTRINSIC(VPID) break;
#include "llvm/IR/VPIntrinsics.def"
}
-
- return FunctionalOC;
+ return None;
}
Intrinsic::ID VPIntrinsic::getForOpcode(unsigned IROPC) {
switch (IROPC) {
default:
- return Intrinsic::not_intrinsic;
+ break;
-#define HANDLE_VP_TO_OPC(OPC) case Instruction::OPC:
+#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) break;
+#define VP_PROPERTY_FUNCTIONAL_OPC(OPC) case Instruction::OPC:
#define END_REGISTER_VP_INTRINSIC(VPID) return Intrinsic::VPID;
#include "llvm/IR/VPIntrinsics.def"
}
+ return Intrinsic::not_intrinsic;
}
bool VPIntrinsic::canIgnoreVectorLengthParam() const {
@@ -516,13 +515,13 @@ Function *VPIntrinsic::getDeclarationForParams(Module *M, Intrinsic::ID VPID,
bool VPReductionIntrinsic::isVPReduction(Intrinsic::ID ID) {
switch (ID) {
default:
- return false;
-#define HANDLE_VP_REDUCTION(VPID, STARTPOS, VECTORPOS) \
- case Intrinsic::VPID: \
break;
+#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID:
+#define VP_PROPERTY_REDUCTION(STARTPOS, ...) return true;
+#define END_REGISTER_VP_INTRINSIC(VPID) break;
#include "llvm/IR/VPIntrinsics.def"
}
- return true;
+ return false;
}
unsigned VPReductionIntrinsic::getVectorParamPos() const {
@@ -535,24 +534,26 @@ unsigned VPReductionIntrinsic::getStartParamPos() const {
Optional<unsigned> VPReductionIntrinsic::getVectorParamPos(Intrinsic::ID ID) {
switch (ID) {
-#define HANDLE_VP_REDUCTION(VPID, STARTPOS, VECTORPOS) \
- case Intrinsic::VPID: \
- return VECTORPOS;
+#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID:
+#define VP_PROPERTY_REDUCTION(STARTPOS, VECTORPOS) return VECTORPOS;
+#define END_REGISTER_VP_INTRINSIC(VPID) break;
#include "llvm/IR/VPIntrinsics.def"
default:
- return None;
+ break;
}
+ return None;
}
Optional<unsigned> VPReductionIntrinsic::getStartParamPos(Intrinsic::ID ID) {
switch (ID) {
-#define HANDLE_VP_REDUCTION(VPID, STARTPOS, VECTORPOS) \
- case Intrinsic::VPID: \
- return STARTPOS;
+#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID:
+#define VP_PROPERTY_REDUCTION(STARTPOS, VECTORPOS) return STARTPOS;
+#define END_REGISTER_VP_INTRINSIC(VPID) break;
#include "llvm/IR/VPIntrinsics.def"
default:
- return None;
+ break;
}
+ return None;
}
Instruction::BinaryOps BinaryOpIntrinsic::getBinaryOp() const {
diff --git a/contrib/llvm-project/llvm/lib/IR/Operator.cpp b/contrib/llvm-project/llvm/lib/IR/Operator.cpp
index cf309ffd6212..d15fcfbc5b9f 100644
--- a/contrib/llvm-project/llvm/lib/IR/Operator.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/Operator.cpp
@@ -226,4 +226,25 @@ bool GEPOperator::collectOffset(
}
return true;
}
+
+void FastMathFlags::print(raw_ostream &O) const {
+ if (all())
+ O << " fast";
+ else {
+ if (allowReassoc())
+ O << " reassoc";
+ if (noNaNs())
+ O << " nnan";
+ if (noInfs())
+ O << " ninf";
+ if (noSignedZeros())
+ O << " nsz";
+ if (allowReciprocal())
+ O << " arcp";
+ if (allowContract())
+ O << " contract";
+ if (approxFunc())
+ O << " afn";
+ }
+}
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/IR/PassTimingInfo.cpp b/contrib/llvm-project/llvm/lib/IR/PassTimingInfo.cpp
index d0c1517f480b..a03fafec9fac 100644
--- a/contrib/llvm-project/llvm/lib/IR/PassTimingInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/PassTimingInfo.cpp
@@ -187,7 +187,7 @@ Timer &TimePassesHandler::getPassTimer(StringRef PassID) {
Timer *T = new Timer(PassID, FullDesc, TG);
Timers.emplace_back(T);
- assert(Count == Timers.size() && "sanity check");
+ assert(Count == Timers.size() && "Timers vector not adjusted correctly.");
return *T;
}
diff --git a/contrib/llvm-project/llvm/lib/IR/SafepointIRVerifier.cpp b/contrib/llvm-project/llvm/lib/IR/SafepointIRVerifier.cpp
index 9be6de693ee3..2117527a64f0 100644
--- a/contrib/llvm-project/llvm/lib/IR/SafepointIRVerifier.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/SafepointIRVerifier.cpp
@@ -6,9 +6,9 @@
//
//===----------------------------------------------------------------------===//
//
-// Run a sanity check on the IR to ensure that Safepoints - if they've been
-// inserted - were inserted correctly. In particular, look for use of
-// non-relocated values after a safepoint. It's primary use is to check the
+// Run a basic correctness check on the IR to ensure that Safepoints - if
+// they've been inserted - were inserted correctly. In particular, look for use
+// of non-relocated values after a safepoint. It's primary use is to check the
// correctness of safepoint insertion immediately after insertion, but it can
// also be used to verify that later transforms have not found a way to break
// safepoint semenatics.
diff --git a/contrib/llvm-project/llvm/lib/IR/Verifier.cpp b/contrib/llvm-project/llvm/lib/IR/Verifier.cpp
index dc4370d4b6ed..154b59835b01 100644
--- a/contrib/llvm-project/llvm/lib/IR/Verifier.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/Verifier.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
//
// This file defines the function verifier interface, that can be used for some
-// sanity checking of input to the system.
+// basic correctness checking of input to the system.
//
// Note that this does not provide full `Java style' security and verifications,
// instead it just tries to ensure that code is well-formed.
@@ -1604,7 +1604,7 @@ Verifier::visitModuleFlag(const MDNode *Op,
Assert(ID, "invalid ID operand in module flag (expected metadata string)",
Op->getOperand(1));
- // Sanity check the values for behaviors with additional requirements.
+ // Check the values for behaviors with additional requirements.
switch (MFB) {
case Module::Error:
case Module::Warning:
@@ -5269,24 +5269,32 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
Op0ElemTy =
cast<VectorType>(Call.getArgOperand(0)->getType())->getElementType();
break;
- case Intrinsic::matrix_column_major_load:
+ case Intrinsic::matrix_column_major_load: {
Stride = dyn_cast<ConstantInt>(Call.getArgOperand(1));
NumRows = cast<ConstantInt>(Call.getArgOperand(3));
NumColumns = cast<ConstantInt>(Call.getArgOperand(4));
ResultTy = cast<VectorType>(Call.getType());
- Op0ElemTy =
- cast<PointerType>(Call.getArgOperand(0)->getType())->getElementType();
+
+ PointerType *Op0PtrTy =
+ cast<PointerType>(Call.getArgOperand(0)->getType());
+ if (!Op0PtrTy->isOpaque())
+ Op0ElemTy = Op0PtrTy->getElementType();
break;
- case Intrinsic::matrix_column_major_store:
+ }
+ case Intrinsic::matrix_column_major_store: {
Stride = dyn_cast<ConstantInt>(Call.getArgOperand(2));
NumRows = cast<ConstantInt>(Call.getArgOperand(4));
NumColumns = cast<ConstantInt>(Call.getArgOperand(5));
ResultTy = cast<VectorType>(Call.getArgOperand(0)->getType());
Op0ElemTy =
cast<VectorType>(Call.getArgOperand(0)->getType())->getElementType();
- Op1ElemTy =
- cast<PointerType>(Call.getArgOperand(1)->getType())->getElementType();
+
+ PointerType *Op1PtrTy =
+ cast<PointerType>(Call.getArgOperand(1)->getType());
+ if (!Op1PtrTy->isOpaque())
+ Op1ElemTy = Op1PtrTy->getElementType();
break;
+ }
default:
llvm_unreachable("unexpected intrinsic");
}
@@ -5295,9 +5303,10 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
ResultTy->getElementType()->isFloatingPointTy(),
"Result type must be an integer or floating-point type!", IF);
- Assert(ResultTy->getElementType() == Op0ElemTy,
- "Vector element type mismatch of the result and first operand "
- "vector!", IF);
+ if (Op0ElemTy)
+ Assert(ResultTy->getElementType() == Op0ElemTy,
+ "Vector element type mismatch of the result and first operand "
+ "vector!", IF);
if (Op1ElemTy)
Assert(ResultTy->getElementType() == Op1ElemTy,
diff --git a/contrib/llvm-project/llvm/lib/InterfaceStub/ELFObjHandler.cpp b/contrib/llvm-project/llvm/lib/InterfaceStub/ELFObjHandler.cpp
index d41c7d3217d7..0d1a864f31ac 100644
--- a/contrib/llvm-project/llvm/lib/InterfaceStub/ELFObjHandler.cpp
+++ b/contrib/llvm-project/llvm/lib/InterfaceStub/ELFObjHandler.cpp
@@ -372,7 +372,7 @@ Error appendToError(Error Err, StringRef After) {
/// This function populates a DynamicEntries struct using an ELFT::DynRange.
/// After populating the struct, the members are validated with
-/// some basic sanity checks.
+/// some basic correctness checks.
///
/// @param Dyn Target DynamicEntries struct to populate.
/// @param DynTable Source dynamic table.
diff --git a/contrib/llvm-project/llvm/lib/MC/MCAsmStreamer.cpp b/contrib/llvm-project/llvm/lib/MC/MCAsmStreamer.cpp
index 154b2d051f34..2ca921017171 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCAsmStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCAsmStreamer.cpp
@@ -1069,16 +1069,14 @@ void MCAsmStreamer::PrintQuotedString(StringRef Data, raw_ostream &OS) const {
OS << '"';
if (MAI->hasPairedDoubleQuoteStringConstants()) {
- for (unsigned i = 0, e = Data.size(); i != e; ++i) {
- unsigned char C = Data[i];
+ for (unsigned char C : Data) {
if (C == '"')
OS << "\"\"";
else
OS << (char)C;
}
} else {
- for (unsigned i = 0, e = Data.size(); i != e; ++i) {
- unsigned char C = Data[i];
+ for (unsigned char C : Data) {
if (C == '"' || C == '\\') {
OS << '\\' << (char)C;
continue;
diff --git a/contrib/llvm-project/llvm/lib/MC/MCELFStreamer.cpp b/contrib/llvm-project/llvm/lib/MC/MCELFStreamer.cpp
index 1ba999a63113..fbf3c860368a 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCELFStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCELFStreamer.cpp
@@ -646,8 +646,6 @@ void MCELFStreamer::emitBundleAlignMode(unsigned AlignPow2) {
void MCELFStreamer::emitBundleLock(bool AlignToEnd) {
MCSection &Sec = *getCurrentSectionOnly();
- // Sanity checks
- //
if (!getAssembler().isBundlingEnabled())
report_fatal_error(".bundle_lock forbidden when bundling is disabled");
@@ -667,7 +665,6 @@ void MCELFStreamer::emitBundleLock(bool AlignToEnd) {
void MCELFStreamer::emitBundleUnlock() {
MCSection &Sec = *getCurrentSectionOnly();
- // Sanity checks
if (!getAssembler().isBundlingEnabled())
report_fatal_error(".bundle_unlock forbidden when bundling is disabled");
else if (!isBundleLocked())
diff --git a/contrib/llvm-project/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/contrib/llvm-project/llvm/lib/MC/MCParser/ELFAsmParser.cpp
index ddc41d0a08ab..e95019c12db7 100644
--- a/contrib/llvm-project/llvm/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/MCParser/ELFAsmParser.cpp
@@ -676,14 +676,14 @@ EndStmt:
getContext().getELFSection(SectionName, Type, Flags, Size, GroupName,
IsComdat, UniqueID, LinkedToSym);
getStreamer().SwitchSection(Section, Subsection);
- if (Section->getType() != Type &&
+ // Check that flags are used consistently. However, the GNU assembler permits
+ // to leave out in subsequent uses of the same sections; for compatibility,
+ // do likewise.
+ if (!TypeName.empty() && Section->getType() != Type &&
!allowSectionTypeMismatch(getContext().getTargetTriple(), SectionName,
Type))
Error(loc, "changed section type for " + SectionName + ", expected: 0x" +
utohexstr(Section->getType()));
- // Check that flags are used consistently. However, the GNU assembler permits
- // to leave out in subsequent uses of the same sections; for compatibility,
- // do likewise.
if ((extraFlags || Size || !TypeName.empty()) && Section->getFlags() != Flags)
Error(loc, "changed section flags for " + SectionName + ", expected: 0x" +
utohexstr(Section->getFlags()));
diff --git a/contrib/llvm-project/llvm/lib/MC/WinCOFFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/MC/WinCOFFObjectWriter.cpp
index 646f416821ae..73c687331d30 100644
--- a/contrib/llvm-project/llvm/lib/MC/WinCOFFObjectWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/WinCOFFObjectWriter.cpp
@@ -56,6 +56,8 @@ using llvm::support::endian::write32le;
namespace {
+constexpr int OffsetLabelIntervalBits = 20;
+
using name = SmallString<COFF::NameSize>;
enum AuxiliaryType {
@@ -120,6 +122,8 @@ public:
relocations Relocations;
COFFSection(StringRef Name) : Name(std::string(Name)) {}
+
+ SmallVector<COFFSymbol *, 1> OffsetSymbols;
};
class WinCOFFObjectWriter : public MCObjectWriter {
@@ -149,6 +153,7 @@ public:
symbol_list WeakDefaults;
bool UseBigObj;
+ bool UseOffsetLabels = false;
bool EmitAddrsigSection = false;
MCSectionCOFF *AddrsigSection;
@@ -174,7 +179,7 @@ public:
COFFSymbol *GetOrCreateCOFFSymbol(const MCSymbol *Symbol);
COFFSection *createSection(StringRef Name);
- void defineSection(MCSectionCOFF const &Sec);
+ void defineSection(MCSectionCOFF const &Sec, const MCAsmLayout &Layout);
COFFSymbol *getLinkedSymbol(const MCSymbol &Symbol);
void DefineSymbol(const MCSymbol &Symbol, MCAssembler &Assembler,
@@ -244,6 +249,11 @@ WinCOFFObjectWriter::WinCOFFObjectWriter(
std::unique_ptr<MCWinCOFFObjectTargetWriter> MOTW, raw_pwrite_stream &OS)
: W(OS, support::little), TargetObjectWriter(std::move(MOTW)) {
Header.Machine = TargetObjectWriter->getMachine();
+ // Some relocations on ARM64 (the 21 bit ADRP relocations) have a slightly
+ // limited range for the immediate offset (+/- 1 MB); create extra offset
+ // label symbols with regular intervals to allow referencing a
+ // non-temporary symbol that is close enough.
+ UseOffsetLabels = Header.Machine == COFF::IMAGE_FILE_MACHINE_ARM64;
}
COFFSymbol *WinCOFFObjectWriter::createSymbol(StringRef Name) {
@@ -299,7 +309,8 @@ static uint32_t getAlignment(const MCSectionCOFF &Sec) {
/// This function takes a section data object from the assembler
/// and creates the associated COFF section staging object.
-void WinCOFFObjectWriter::defineSection(const MCSectionCOFF &MCSec) {
+void WinCOFFObjectWriter::defineSection(const MCSectionCOFF &MCSec,
+ const MCAsmLayout &Layout) {
COFFSection *Section = createSection(MCSec.getName());
COFFSymbol *Symbol = createSymbol(MCSec.getName());
Section->Symbol = Symbol;
@@ -329,6 +340,20 @@ void WinCOFFObjectWriter::defineSection(const MCSectionCOFF &MCSec) {
// Bind internal COFF section to MC section.
Section->MCSection = &MCSec;
SectionMap[&MCSec] = Section;
+
+ if (UseOffsetLabels && !MCSec.getFragmentList().empty()) {
+ const uint32_t Interval = 1 << OffsetLabelIntervalBits;
+ uint32_t N = 1;
+ for (uint32_t Off = Interval, E = Layout.getSectionAddressSize(&MCSec);
+ Off < E; Off += Interval) {
+ auto Name = ("$L" + MCSec.getName() + "_" + Twine(N++)).str();
+ COFFSymbol *Label = createSymbol(Name);
+ Label->Section = Section;
+ Label->Data.StorageClass = COFF::IMAGE_SYM_CLASS_LABEL;
+ Label->Data.Value = Off;
+ Section->OffsetSymbols.push_back(Label);
+ }
+ }
}
static uint64_t getSymbolValue(const MCSymbol &Symbol,
@@ -688,7 +713,7 @@ void WinCOFFObjectWriter::executePostLayoutBinding(MCAssembler &Asm,
// "Define" each section & symbol. This creates section & symbol
// entries in the staging area.
for (const auto &Section : Asm)
- defineSection(static_cast<const MCSectionCOFF &>(Section));
+ defineSection(static_cast<const MCSectionCOFF &>(Section), Layout);
for (const MCSymbol &Symbol : Asm.symbols())
if (!Symbol.isTemporary())
@@ -774,8 +799,23 @@ void WinCOFFObjectWriter::recordRelocation(MCAssembler &Asm,
assert(
SectionMap.find(TargetSection) != SectionMap.end() &&
"Section must already have been defined in executePostLayoutBinding!");
- Reloc.Symb = SectionMap[TargetSection]->Symbol;
+ COFFSection *Section = SectionMap[TargetSection];
+ Reloc.Symb = Section->Symbol;
FixedValue += Layout.getSymbolOffset(A);
+ // Technically, we should do the final adjustments of FixedValue (below)
+ // before picking an offset symbol, otherwise we might choose one which
+ // is slightly too far away. The relocations where it really matters
+ // (arm64 adrp relocations) don't get any offset though.
+ if (UseOffsetLabels && !Section->OffsetSymbols.empty()) {
+ uint64_t LabelIndex = FixedValue >> OffsetLabelIntervalBits;
+ if (LabelIndex > 0) {
+ if (LabelIndex <= Section->OffsetSymbols.size())
+ Reloc.Symb = Section->OffsetSymbols[LabelIndex - 1];
+ else
+ Reloc.Symb = Section->OffsetSymbols.back();
+ FixedValue -= Reloc.Symb->Data.Value;
+ }
+ }
} else {
assert(
SymbolMap.find(&A) != SymbolMap.end() &&
diff --git a/contrib/llvm-project/llvm/lib/MCA/InstrBuilder.cpp b/contrib/llvm-project/llvm/lib/MCA/InstrBuilder.cpp
index 0ab845a4c28f..d8283f8d2682 100644
--- a/contrib/llvm-project/llvm/lib/MCA/InstrBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/MCA/InstrBuilder.cpp
@@ -612,7 +612,7 @@ InstrBuilder::createInstrDescImpl(const MCInst &MCI) {
LLVM_DEBUG(dbgs() << "\t\tMaxLatency=" << ID->MaxLatency << '\n');
LLVM_DEBUG(dbgs() << "\t\tNumMicroOps=" << ID->NumMicroOps << '\n');
- // Sanity check on the instruction descriptor.
+ // Validation check on the instruction descriptor.
if (Error Err = verifyInstrDesc(*ID, MCI))
return std::move(Err);
diff --git a/contrib/llvm-project/llvm/lib/MCA/Stages/ExecuteStage.cpp b/contrib/llvm-project/llvm/lib/MCA/Stages/ExecuteStage.cpp
index 6e021d3d9232..2b11f73b19df 100644
--- a/contrib/llvm-project/llvm/lib/MCA/Stages/ExecuteStage.cpp
+++ b/contrib/llvm-project/llvm/lib/MCA/Stages/ExecuteStage.cpp
@@ -188,7 +188,7 @@ Error ExecuteStage::execute(InstRef &IR) {
#ifndef NDEBUG
// Ensure that the HWS has not stored this instruction in its queues.
- HWS.sanityCheck(IR);
+ HWS.instructionCheck(IR);
#endif
if (IR.getInstruction()->isEliminated())
diff --git a/contrib/llvm-project/llvm/lib/Object/ELFObjectFile.cpp b/contrib/llvm-project/llvm/lib/Object/ELFObjectFile.cpp
index 50035d6c7523..cf1f12d9a9a7 100644
--- a/contrib/llvm-project/llvm/lib/Object/ELFObjectFile.cpp
+++ b/contrib/llvm-project/llvm/lib/Object/ELFObjectFile.cpp
@@ -682,7 +682,7 @@ readDynsymVersionsImpl(const ELFFile<ELFT> &EF,
std::vector<VersionEntry> Ret;
size_t I = 0;
- for (auto It = Symbols.begin(), E = Symbols.end(); It != E; ++It) {
+ for (const ELFSymbolRef &Sym : Symbols) {
++I;
Expected<const typename ELFT::Versym *> VerEntryOrErr =
EF.template getEntry<typename ELFT::Versym>(*VerSec, I);
@@ -691,7 +691,7 @@ readDynsymVersionsImpl(const ELFFile<ELFT> &EF,
" from " + describe(EF, *VerSec) + ": " +
toString(VerEntryOrErr.takeError()));
- Expected<uint32_t> FlagsOrErr = It->getFlags();
+ Expected<uint32_t> FlagsOrErr = Sym.getFlags();
if (!FlagsOrErr)
return createError("unable to read flags for symbol with index " +
Twine(I) + ": " + toString(FlagsOrErr.takeError()));
diff --git a/contrib/llvm-project/llvm/lib/ObjectYAML/COFFEmitter.cpp b/contrib/llvm-project/llvm/lib/ObjectYAML/COFFEmitter.cpp
index 5f38ca13cfc2..66ad16db1ba4 100644
--- a/contrib/llvm-project/llvm/lib/ObjectYAML/COFFEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/ObjectYAML/COFFEmitter.cpp
@@ -476,29 +476,25 @@ static bool writeCOFF(COFFParser &CP, raw_ostream &OS) {
assert(OS.tell() == CP.SectionTableStart);
// Output section table.
- for (std::vector<COFFYAML::Section>::iterator i = CP.Obj.Sections.begin(),
- e = CP.Obj.Sections.end();
- i != e; ++i) {
- OS.write(i->Header.Name, COFF::NameSize);
- OS << binary_le(i->Header.VirtualSize)
- << binary_le(i->Header.VirtualAddress)
- << binary_le(i->Header.SizeOfRawData)
- << binary_le(i->Header.PointerToRawData)
- << binary_le(i->Header.PointerToRelocations)
- << binary_le(i->Header.PointerToLineNumbers)
- << binary_le(i->Header.NumberOfRelocations)
- << binary_le(i->Header.NumberOfLineNumbers)
- << binary_le(i->Header.Characteristics);
+ for (const COFFYAML::Section &S : CP.Obj.Sections) {
+ OS.write(S.Header.Name, COFF::NameSize);
+ OS << binary_le(S.Header.VirtualSize)
+ << binary_le(S.Header.VirtualAddress)
+ << binary_le(S.Header.SizeOfRawData)
+ << binary_le(S.Header.PointerToRawData)
+ << binary_le(S.Header.PointerToRelocations)
+ << binary_le(S.Header.PointerToLineNumbers)
+ << binary_le(S.Header.NumberOfRelocations)
+ << binary_le(S.Header.NumberOfLineNumbers)
+ << binary_le(S.Header.Characteristics);
}
assert(OS.tell() == CP.SectionTableStart + CP.SectionTableSize);
unsigned CurSymbol = 0;
StringMap<unsigned> SymbolTableIndexMap;
- for (std::vector<COFFYAML::Symbol>::iterator I = CP.Obj.Symbols.begin(),
- E = CP.Obj.Symbols.end();
- I != E; ++I) {
- SymbolTableIndexMap[I->Name] = CurSymbol;
- CurSymbol += 1 + I->Header.NumberOfAuxSymbols;
+ for (const COFFYAML::Symbol &Sym : CP.Obj.Symbols) {
+ SymbolTableIndexMap[Sym.Name] = CurSymbol;
+ CurSymbol += 1 + Sym.Header.NumberOfAuxSymbols;
}
// Output section data.
diff --git a/contrib/llvm-project/llvm/lib/ObjectYAML/ELFYAML.cpp b/contrib/llvm-project/llvm/lib/ObjectYAML/ELFYAML.cpp
index fdf9aeae1622..e0dde4433d24 100644
--- a/contrib/llvm-project/llvm/lib/ObjectYAML/ELFYAML.cpp
+++ b/contrib/llvm-project/llvm/lib/ObjectYAML/ELFYAML.cpp
@@ -155,6 +155,10 @@ void ScalarEnumerationTraits<ELFYAML::ELF_NT>::enumeration(
ECase(NT_FREEBSD_PROCSTAT_OSREL);
ECase(NT_FREEBSD_PROCSTAT_PSSTRINGS);
ECase(NT_FREEBSD_PROCSTAT_AUXV);
+ // NetBSD core note types.
+ ECase(NT_NETBSDCORE_PROCINFO);
+ ECase(NT_NETBSDCORE_AUXV);
+ ECase(NT_NETBSDCORE_LWPSTATUS);
// OpenBSD core note types.
ECase(NT_OPENBSD_PROCINFO);
ECase(NT_OPENBSD_AUXV);
diff --git a/contrib/llvm-project/llvm/lib/ObjectYAML/MachOEmitter.cpp b/contrib/llvm-project/llvm/lib/ObjectYAML/MachOEmitter.cpp
index c653c29ec9a7..e5ffb12df434 100644
--- a/contrib/llvm-project/llvm/lib/ObjectYAML/MachOEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/ObjectYAML/MachOEmitter.cpp
@@ -54,6 +54,7 @@ private:
void writeNameList(raw_ostream &OS);
void writeStringTable(raw_ostream &OS);
void writeExportTrie(raw_ostream &OS);
+ void writeDynamicSymbolTable(raw_ostream &OS);
void dumpExportEntry(raw_ostream &OS, MachOYAML::ExportEntry &Entry);
void ZeroToOffset(raw_ostream &OS, size_t offset);
@@ -482,6 +483,7 @@ void MachOWriter::writeLinkEditData(raw_ostream &OS) {
MachO::dyld_info_command *DyldInfoOnlyCmd = 0;
MachO::symtab_command *SymtabCmd = 0;
+ MachO::dysymtab_command *DSymtabCmd = 0;
for (auto &LC : Obj.LoadCommands) {
switch (LC.Data.load_command_data.cmd) {
case MachO::LC_SYMTAB:
@@ -504,6 +506,11 @@ void MachOWriter::writeLinkEditData(raw_ostream &OS) {
WriteQueue.push_back(std::make_pair(DyldInfoOnlyCmd->export_off,
&MachOWriter::writeExportTrie));
break;
+ case MachO::LC_DYSYMTAB:
+ DSymtabCmd = &LC.Data.dysymtab_command_data;
+ WriteQueue.push_back(std::make_pair(
+ DSymtabCmd->indirectsymoff, &MachOWriter::writeDynamicSymbolTable));
+ break;
}
}
@@ -556,6 +563,12 @@ void MachOWriter::writeStringTable(raw_ostream &OS) {
}
}
+void MachOWriter::writeDynamicSymbolTable(raw_ostream &OS) {
+ for (auto Data : Obj.LinkEdit.IndirectSymbols)
+ OS.write(reinterpret_cast<const char *>(&Data),
+ sizeof(yaml::Hex32::BaseType));
+}
+
class UniversalWriter {
public:
UniversalWriter(yaml::YamlObjectFile &ObjectFile)
diff --git a/contrib/llvm-project/llvm/lib/ObjectYAML/MachOYAML.cpp b/contrib/llvm-project/llvm/lib/ObjectYAML/MachOYAML.cpp
index c9562bd72258..f32009458110 100644
--- a/contrib/llvm-project/llvm/lib/ObjectYAML/MachOYAML.cpp
+++ b/contrib/llvm-project/llvm/lib/ObjectYAML/MachOYAML.cpp
@@ -164,6 +164,7 @@ void MappingTraits<MachOYAML::LinkEditData>::mapping(
IO.mapOptional("ExportTrie", LinkEditData.ExportTrie);
IO.mapOptional("NameList", LinkEditData.NameList);
IO.mapOptional("StringTable", LinkEditData.StringTable);
+ IO.mapOptional("IndirectSymbols", LinkEditData.IndirectSymbols);
}
void MappingTraits<MachOYAML::RebaseOpcode>::mapping(
diff --git a/contrib/llvm-project/llvm/lib/Option/OptTable.cpp b/contrib/llvm-project/llvm/lib/Option/OptTable.cpp
index 37c2fcbab181..19e05b9272bb 100644
--- a/contrib/llvm-project/llvm/lib/Option/OptTable.cpp
+++ b/contrib/llvm-project/llvm/lib/Option/OptTable.cpp
@@ -150,10 +150,9 @@ OptTable::OptTable(ArrayRef<Info> OptionInfos, bool IgnoreCase)
for (StringSet<>::const_iterator I = PrefixesUnion.begin(),
E = PrefixesUnion.end(); I != E; ++I) {
StringRef Prefix = I->getKey();
- for (StringRef::const_iterator C = Prefix.begin(), CE = Prefix.end();
- C != CE; ++C)
- if (!is_contained(PrefixChars, *C))
- PrefixChars.push_back(*C);
+ for (char C : Prefix)
+ if (!is_contained(PrefixChars, C))
+ PrefixChars.push_back(C);
}
}
diff --git a/contrib/llvm-project/llvm/lib/Passes/PassBuilderPipelines.cpp b/contrib/llvm-project/llvm/lib/Passes/PassBuilderPipelines.cpp
index ac5dfdbdd540..de1b0ace7876 100644
--- a/contrib/llvm-project/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/contrib/llvm-project/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -1765,6 +1765,8 @@ ModulePassManager PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level,
if (LTOPreLink)
addRequiredLTOPreLinkPasses(MPM);
+ MPM.addPass(createModuleToFunctionPassAdaptor(AnnotationRemarksPass()));
+
return MPM;
}
diff --git a/contrib/llvm-project/llvm/lib/Passes/StandardInstrumentations.cpp b/contrib/llvm-project/llvm/lib/Passes/StandardInstrumentations.cpp
index 8e6be6730ea4..27a6c519ff82 100644
--- a/contrib/llvm-project/llvm/lib/Passes/StandardInstrumentations.cpp
+++ b/contrib/llvm-project/llvm/lib/Passes/StandardInstrumentations.cpp
@@ -225,8 +225,8 @@ std::string doSystemDiff(StringRef Before, StringRef After,
return "Unable to read result.";
// Clean up.
- for (unsigned I = 0; I < NumFiles; ++I) {
- std::error_code EC = sys::fs::remove(FileName[I]);
+ for (const std::string &I : FileName) {
+ std::error_code EC = sys::fs::remove(I);
if (EC)
return "Unable to remove temporary file.";
}
diff --git a/contrib/llvm-project/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp b/contrib/llvm-project/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp
index 94bd4807041d..c6691e321b3c 100644
--- a/contrib/llvm-project/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp
+++ b/contrib/llvm-project/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp
@@ -83,7 +83,6 @@ Error RawCoverageReader::readIntMax(uint64_t &Result, uint64_t MaxPlus1) {
Error RawCoverageReader::readSize(uint64_t &Result) {
if (auto Err = readULEB128(Result))
return Err;
- // Sanity check the number.
if (Result > Data.size())
return make_error<CoverageMapError>(coveragemap_error::malformed);
return Error::success();
diff --git a/contrib/llvm-project/llvm/lib/ProfileData/InstrProf.cpp b/contrib/llvm-project/llvm/lib/ProfileData/InstrProf.cpp
index 1168ad27fe52..ab3487ecffe8 100644
--- a/contrib/llvm-project/llvm/lib/ProfileData/InstrProf.cpp
+++ b/contrib/llvm-project/llvm/lib/ProfileData/InstrProf.cpp
@@ -657,19 +657,18 @@ void InstrProfValueSiteRecord::merge(InstrProfValueSiteRecord &Input,
Input.sortByTargetValues();
auto I = ValueData.begin();
auto IE = ValueData.end();
- for (auto J = Input.ValueData.begin(), JE = Input.ValueData.end(); J != JE;
- ++J) {
- while (I != IE && I->Value < J->Value)
+ for (const InstrProfValueData &J : Input.ValueData) {
+ while (I != IE && I->Value < J.Value)
++I;
- if (I != IE && I->Value == J->Value) {
+ if (I != IE && I->Value == J.Value) {
bool Overflowed;
- I->Count = SaturatingMultiplyAdd(J->Count, Weight, I->Count, &Overflowed);
+ I->Count = SaturatingMultiplyAdd(J.Count, Weight, I->Count, &Overflowed);
if (Overflowed)
Warn(instrprof_error::counter_overflow);
++I;
continue;
}
- ValueData.insert(I, *J);
+ ValueData.insert(I, J);
}
}
diff --git a/contrib/llvm-project/llvm/lib/ProfileData/InstrProfReader.cpp b/contrib/llvm-project/llvm/lib/ProfileData/InstrProfReader.cpp
index b4e8025dbef9..885c1fe49240 100644
--- a/contrib/llvm-project/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/contrib/llvm-project/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -62,7 +62,6 @@ InstrProfReader::create(const Twine &Path) {
Expected<std::unique_ptr<InstrProfReader>>
InstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer) {
- // Sanity check the buffer.
if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint64_t>::max())
return make_error<InstrProfError>(instrprof_error::too_large);
@@ -113,7 +112,6 @@ IndexedInstrProfReader::create(const Twine &Path, const Twine &RemappingPath) {
Expected<std::unique_ptr<IndexedInstrProfReader>>
IndexedInstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer,
std::unique_ptr<MemoryBuffer> RemappingBuffer) {
- // Sanity check the buffer.
if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint64_t>::max())
return make_error<InstrProfError>(instrprof_error::too_large);
diff --git a/contrib/llvm-project/llvm/lib/ProfileData/RawMemProfReader.cpp b/contrib/llvm-project/llvm/lib/ProfileData/RawMemProfReader.cpp
new file mode 100644
index 000000000000..f8d13c74fac3
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/ProfileData/RawMemProfReader.cpp
@@ -0,0 +1,121 @@
+//===- RawMemProfReader.cpp - Instrumented memory profiling reader --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for reading MemProf profiling data.
+//
+//===----------------------------------------------------------------------===//
+
+#include <cstdint>
+#include <type_traits>
+
+#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/ProfileData/MemProfData.inc"
+#include "llvm/ProfileData/RawMemProfReader.h"
+
+namespace llvm {
+namespace memprof {
+namespace {
+
+struct Summary {
+ uint64_t Version;
+ uint64_t TotalSizeBytes;
+ uint64_t NumSegments;
+ uint64_t NumMIBInfo;
+ uint64_t NumStackOffsets;
+};
+
+template <class T = uint64_t> inline T alignedRead(const char *Ptr) {
+ static_assert(std::is_pod<T>::value, "Not a pod type.");
+ assert(reinterpret_cast<size_t>(Ptr) % sizeof(T) == 0 && "Unaligned Read");
+ return *reinterpret_cast<const T *>(Ptr);
+}
+
+Summary computeSummary(const char *Start) {
+ auto *H = reinterpret_cast<const Header *>(Start);
+
+ // Check alignment while reading the number of items in each section.
+ return Summary{
+ H->Version,
+ H->TotalSize,
+ alignedRead(Start + H->SegmentOffset),
+ alignedRead(Start + H->MIBOffset),
+ alignedRead(Start + H->StackOffset),
+ };
+}
+
+} // namespace
+
+Expected<std::unique_ptr<RawMemProfReader>>
+RawMemProfReader::create(const Twine &Path) {
+ auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path, /*IsText=*/true);
+ if (std::error_code EC = BufferOr.getError())
+ return errorCodeToError(EC);
+
+ std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release());
+
+ if (Buffer->getBufferSize() == 0)
+ return make_error<InstrProfError>(instrprof_error::empty_raw_profile);
+
+ if (!RawMemProfReader::hasFormat(*Buffer))
+ return make_error<InstrProfError>(instrprof_error::bad_magic);
+
+ if (Buffer->getBufferSize() < sizeof(Header)) {
+ return make_error<InstrProfError>(instrprof_error::truncated);
+ }
+
+ // The size of the buffer can be > header total size since we allow repeated
+ // serialization of memprof profiles to the same file.
+ uint64_t TotalSize = 0;
+ const char *Next = Buffer->getBufferStart();
+ while (Next < Buffer->getBufferEnd()) {
+ auto *H = reinterpret_cast<const Header *>(Next);
+ if (H->Version != MEMPROF_RAW_VERSION) {
+ return make_error<InstrProfError>(instrprof_error::unsupported_version);
+ }
+
+ TotalSize += H->TotalSize;
+ Next += H->TotalSize;
+ }
+
+ if (Buffer->getBufferSize() != TotalSize) {
+ return make_error<InstrProfError>(instrprof_error::malformed);
+ }
+
+ return std::make_unique<RawMemProfReader>(std::move(Buffer));
+}
+
+bool RawMemProfReader::hasFormat(const MemoryBuffer &Buffer) {
+ if (Buffer.getBufferSize() < sizeof(uint64_t))
+ return false;
+ // Aligned read to sanity check that the buffer was allocated with at least 8b
+ // alignment.
+ const uint64_t Magic = alignedRead(Buffer.getBufferStart());
+ return Magic == MEMPROF_RAW_MAGIC_64;
+}
+
+void RawMemProfReader::printSummaries(raw_ostream &OS) const {
+ int Count = 0;
+ const char *Next = DataBuffer->getBufferStart();
+ while (Next < DataBuffer->getBufferEnd()) {
+ auto Summary = computeSummary(Next);
+ OS << "MemProf Profile " << ++Count << "\n";
+ OS << " Version: " << Summary.Version << "\n";
+ OS << " TotalSizeBytes: " << Summary.TotalSizeBytes << "\n";
+ OS << " NumSegments: " << Summary.NumSegments << "\n";
+ OS << " NumMIBInfo: " << Summary.NumMIBInfo << "\n";
+ OS << " NumStackOffsets: " << Summary.NumStackOffsets << "\n";
+ // TODO: Print the build ids once we can record them using the
+ // sanitizer_procmaps library for linux.
+
+ auto *H = reinterpret_cast<const Header *>(Next);
+ Next += H->TotalSize;
+ }
+}
+
+} // namespace memprof
+} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/ProfileData/SampleProfReader.cpp b/contrib/llvm-project/llvm/lib/ProfileData/SampleProfReader.cpp
index c99a19020511..eefb7c2ba627 100644
--- a/contrib/llvm-project/llvm/lib/ProfileData/SampleProfReader.cpp
+++ b/contrib/llvm-project/llvm/lib/ProfileData/SampleProfReader.cpp
@@ -1709,7 +1709,7 @@ setupMemoryBuffer(const Twine &Filename) {
return EC;
auto Buffer = std::move(BufferOrErr.get());
- // Sanity check the file.
+ // Check the file.
if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint32_t>::max())
return sampleprof_error::too_large;
diff --git a/contrib/llvm-project/llvm/lib/Support/AArch64TargetParser.cpp b/contrib/llvm-project/llvm/lib/Support/AArch64TargetParser.cpp
index b3136a91e7f5..a3e41ccd199c 100644
--- a/contrib/llvm-project/llvm/lib/Support/AArch64TargetParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/AArch64TargetParser.cpp
@@ -240,52 +240,4 @@ AArch64::ArchKind AArch64::parseCPUArch(StringRef CPU) {
return C.ArchID;
}
return ArchKind::INVALID;
-}
-
-// Parse a branch protection specification, which has the form
-// standard | none | [bti,pac-ret[+b-key,+leaf]*]
-// Returns true on success, with individual elements of the specification
-// returned in `PBP`. Returns false in error, with `Err` containing
-// an erroneous part of the spec.
-bool AArch64::parseBranchProtection(StringRef Spec, ParsedBranchProtection &PBP,
- StringRef &Err) {
- PBP = {"none", "a_key", false};
- if (Spec == "none")
- return true; // defaults are ok
-
- if (Spec == "standard") {
- PBP.Scope = "non-leaf";
- PBP.BranchTargetEnforcement = true;
- return true;
- }
-
- SmallVector<StringRef, 4> Opts;
- Spec.split(Opts, "+");
- for (int I = 0, E = Opts.size(); I != E; ++I) {
- StringRef Opt = Opts[I].trim();
- if (Opt == "bti") {
- PBP.BranchTargetEnforcement = true;
- continue;
- }
- if (Opt == "pac-ret") {
- PBP.Scope = "non-leaf";
- for (; I + 1 != E; ++I) {
- StringRef PACOpt = Opts[I + 1].trim();
- if (PACOpt == "leaf")
- PBP.Scope = "all";
- else if (PACOpt == "b-key")
- PBP.Key = "b_key";
- else
- break;
- }
- continue;
- }
- if (Opt == "")
- Err = "<empty>";
- else
- Err = Opt;
- return false;
- }
-
- return true;
-}
+} \ No newline at end of file
diff --git a/contrib/llvm-project/llvm/lib/Support/ARMAttributeParser.cpp b/contrib/llvm-project/llvm/lib/Support/ARMAttributeParser.cpp
index 459691923af8..241cfb1eedbe 100644
--- a/contrib/llvm-project/llvm/lib/Support/ARMAttributeParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/ARMAttributeParser.cpp
@@ -59,6 +59,10 @@ const ARMAttributeParser::DisplayHandler ARMAttributeParser::displayRoutines[] =
ATTRIBUTE_HANDLER(DSP_extension),
ATTRIBUTE_HANDLER(T2EE_use),
ATTRIBUTE_HANDLER(Virtualization_use),
+ ATTRIBUTE_HANDLER(PAC_extension),
+ ATTRIBUTE_HANDLER(BTI_extension),
+ ATTRIBUTE_HANDLER(PACRET_use),
+ ATTRIBUTE_HANDLER(BTI_use),
ATTRIBUTE_HANDLER(nodefaults),
};
@@ -350,6 +354,28 @@ Error ARMAttributeParser::Virtualization_use(AttrType tag) {
return parseStringAttribute("Virtualization_use", tag, makeArrayRef(strings));
}
+Error ARMAttributeParser::PAC_extension(ARMBuildAttrs::AttrType tag) {
+ static const char *strings[] = {"Not Permitted", "Permitted in NOP space",
+ "Permitted"};
+ return parseStringAttribute("PAC_extension", tag, makeArrayRef(strings));
+}
+
+Error ARMAttributeParser::BTI_extension(ARMBuildAttrs::AttrType tag) {
+ static const char *strings[] = {"Not Permitted", "Permitted in NOP space",
+ "Permitted"};
+ return parseStringAttribute("BTI_extension", tag, makeArrayRef(strings));
+}
+
+Error ARMAttributeParser::PACRET_use(ARMBuildAttrs::AttrType tag) {
+ static const char *strings[] = {"Not Used", "Used"};
+ return parseStringAttribute("PACRET_use", tag, makeArrayRef(strings));
+}
+
+Error ARMAttributeParser::BTI_use(ARMBuildAttrs::AttrType tag) {
+ static const char *strings[] = {"Not Used", "Used"};
+ return parseStringAttribute("BTI_use", tag, makeArrayRef(strings));
+}
+
Error ARMAttributeParser::nodefaults(AttrType tag) {
uint64_t value = de.getULEB128(cursor);
printAttribute(tag, value, "Unspecified Tags UNDEFINED");
diff --git a/contrib/llvm-project/llvm/lib/Support/ARMBuildAttrs.cpp b/contrib/llvm-project/llvm/lib/Support/ARMBuildAttrs.cpp
index f20521f2a2d4..815cfc62a4b0 100644
--- a/contrib/llvm-project/llvm/lib/Support/ARMBuildAttrs.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/ARMBuildAttrs.cpp
@@ -50,6 +50,10 @@ static const TagNameItem tagData[] = {
{ARMBuildAttrs::MPextension_use, "Tag_MPextension_use"},
{ARMBuildAttrs::DIV_use, "Tag_DIV_use"},
{ARMBuildAttrs::DSP_extension, "Tag_DSP_extension"},
+ {ARMBuildAttrs::PAC_extension, "Tag_PAC_extension"},
+ {ARMBuildAttrs::BTI_extension, "Tag_BTI_extension"},
+ {ARMBuildAttrs::BTI_use, "Tag_BTI_use"},
+ {ARMBuildAttrs::PACRET_use, "Tag_PACRET_use"},
{ARMBuildAttrs::nodefaults, "Tag_nodefaults"},
{ARMBuildAttrs::also_compatible_with, "Tag_also_compatible_with"},
{ARMBuildAttrs::T2EE_use, "Tag_T2EE_use"},
diff --git a/contrib/llvm-project/llvm/lib/Support/CommandLine.cpp b/contrib/llvm-project/llvm/lib/Support/CommandLine.cpp
index e64934aa90cc..5b7004c86f5a 100644
--- a/contrib/llvm-project/llvm/lib/Support/CommandLine.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/CommandLine.cpp
@@ -2656,10 +2656,13 @@ cl::getRegisteredSubcommands() {
void cl::HideUnrelatedOptions(cl::OptionCategory &Category, SubCommand &Sub) {
initCommonOptions();
for (auto &I : Sub.OptionsMap) {
+ bool Unrelated = true;
for (auto &Cat : I.second->Categories) {
- if (Cat != &Category && Cat != &CommonOptions->GenericCategory)
- I.second->setHiddenFlag(cl::ReallyHidden);
+ if (Cat == &Category || Cat == &CommonOptions->GenericCategory)
+ Unrelated = false;
}
+ if (Unrelated)
+ I.second->setHiddenFlag(cl::ReallyHidden);
}
}
@@ -2667,11 +2670,14 @@ void cl::HideUnrelatedOptions(ArrayRef<const cl::OptionCategory *> Categories,
SubCommand &Sub) {
initCommonOptions();
for (auto &I : Sub.OptionsMap) {
+ bool Unrelated = true;
for (auto &Cat : I.second->Categories) {
- if (!is_contained(Categories, Cat) &&
- Cat != &CommonOptions->GenericCategory)
- I.second->setHiddenFlag(cl::ReallyHidden);
+ if (is_contained(Categories, Cat) ||
+ Cat == &CommonOptions->GenericCategory)
+ Unrelated = false;
}
+ if (Unrelated)
+ I.second->setHiddenFlag(cl::ReallyHidden);
}
}
diff --git a/contrib/llvm-project/llvm/lib/Support/HTTPClient.cpp b/contrib/llvm-project/llvm/lib/Support/HTTPClient.cpp
new file mode 100644
index 000000000000..68ba56d1fe50
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Support/HTTPClient.cpp
@@ -0,0 +1,97 @@
+//===-- llvm/Support/HTTPClient.cpp - HTTP client library -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+///
+/// This file defines the methods of the HTTPRequest, HTTPClient, and
+/// BufferedHTTPResponseHandler classes.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/HTTPClient.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/MemoryBuffer.h"
+
+using namespace llvm;
+
+HTTPRequest::HTTPRequest(StringRef Url) { this->Url = Url.str(); }
+
+bool operator==(const HTTPRequest &A, const HTTPRequest &B) {
+ return A.Url == B.Url && A.Method == B.Method &&
+ A.FollowRedirects == B.FollowRedirects;
+}
+
+HTTPResponseHandler::~HTTPResponseHandler() = default;
+
+static inline bool parseContentLengthHeader(StringRef LineRef,
+ size_t &ContentLength) {
+ // Content-Length is a mandatory header, and the only one we handle.
+ return LineRef.consume_front("Content-Length: ") &&
+ to_integer(LineRef.trim(), ContentLength, 10);
+}
+
+Error BufferedHTTPResponseHandler::handleHeaderLine(StringRef HeaderLine) {
+ if (ResponseBuffer.Body)
+ return Error::success();
+
+ size_t ContentLength;
+ if (parseContentLengthHeader(HeaderLine, ContentLength))
+ ResponseBuffer.Body =
+ WritableMemoryBuffer::getNewUninitMemBuffer(ContentLength);
+
+ return Error::success();
+}
+
+Error BufferedHTTPResponseHandler::handleBodyChunk(StringRef BodyChunk) {
+ if (!ResponseBuffer.Body)
+ return createStringError(errc::io_error,
+ "Unallocated response buffer. HTTP Body data "
+ "received before Content-Length header.");
+ if (Offset + BodyChunk.size() > ResponseBuffer.Body->getBufferSize())
+ return createStringError(errc::io_error,
+ "Content size exceeds buffer size.");
+ memcpy(ResponseBuffer.Body->getBufferStart() + Offset, BodyChunk.data(),
+ BodyChunk.size());
+ Offset += BodyChunk.size();
+ return Error::success();
+}
+
+Error BufferedHTTPResponseHandler::handleStatusCode(unsigned Code) {
+ ResponseBuffer.Code = Code;
+ return Error::success();
+}
+
+Expected<HTTPResponseBuffer> HTTPClient::perform(const HTTPRequest &Request) {
+ BufferedHTTPResponseHandler Handler;
+ if (Error Err = perform(Request, Handler))
+ return std::move(Err);
+ return std::move(Handler.ResponseBuffer);
+}
+
+Expected<HTTPResponseBuffer> HTTPClient::get(StringRef Url) {
+ HTTPRequest Request(Url);
+ return perform(Request);
+}
+
+HTTPClient::HTTPClient() = default;
+
+HTTPClient::~HTTPClient() = default;
+
+bool HTTPClient::isAvailable() { return false; }
+
+void HTTPClient::cleanup() {}
+
+void HTTPClient::setTimeout(std::chrono::milliseconds Timeout) {}
+
+Error HTTPClient::perform(const HTTPRequest &Request,
+ HTTPResponseHandler &Handler) {
+ llvm_unreachable("No HTTP Client implementation available.");
+}
diff --git a/contrib/llvm-project/llvm/lib/Support/KnownBits.cpp b/contrib/llvm-project/llvm/lib/Support/KnownBits.cpp
index 90483817c302..554e3248524c 100644
--- a/contrib/llvm-project/llvm/lib/Support/KnownBits.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/KnownBits.cpp
@@ -421,11 +421,10 @@ KnownBits KnownBits::mul(const KnownBits &LHS, const KnownBits &RHS,
"Self multiplication knownbits mismatch");
// Compute a conservative estimate for high known-0 bits.
- unsigned LeadZ =
- std::max(LHS.countMinLeadingZeros() + RHS.countMinLeadingZeros(),
- BitWidth) -
- BitWidth;
- LeadZ = std::min(LeadZ, BitWidth);
+ unsigned LHSLeadZ = LHS.countMinLeadingZeros();
+ unsigned RHSLeadZ = RHS.countMinLeadingZeros();
+ unsigned LeadZ = std::max(LHSLeadZ + RHSLeadZ, BitWidth) - BitWidth;
+ assert(LeadZ <= BitWidth && "More zeros than bits?");
// The result of the bottom bits of an integer multiply can be
// inferred by looking at the bottom bits of both operands and
diff --git a/contrib/llvm-project/llvm/lib/Support/Regex.cpp b/contrib/llvm-project/llvm/lib/Support/Regex.cpp
index 0d5cc1c00db1..7a804a1a2297 100644
--- a/contrib/llvm-project/llvm/lib/Support/Regex.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/Regex.cpp
@@ -218,10 +218,10 @@ bool Regex::isLiteralERE(StringRef Str) {
std::string Regex::escape(StringRef String) {
std::string RegexStr;
- for (unsigned i = 0, e = String.size(); i != e; ++i) {
- if (strchr(RegexMetachars, String[i]))
+ for (char C : String) {
+ if (strchr(RegexMetachars, C))
RegexStr += '\\';
- RegexStr += String[i];
+ RegexStr += C;
}
return RegexStr;
diff --git a/contrib/llvm-project/llvm/lib/Support/StringExtras.cpp b/contrib/llvm-project/llvm/lib/Support/StringExtras.cpp
index 8abf9f7ce0f1..5683d7005584 100644
--- a/contrib/llvm-project/llvm/lib/Support/StringExtras.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/StringExtras.cpp
@@ -60,8 +60,7 @@ void llvm::SplitString(StringRef Source,
}
void llvm::printEscapedString(StringRef Name, raw_ostream &Out) {
- for (unsigned i = 0, e = Name.size(); i != e; ++i) {
- unsigned char C = Name[i];
+ for (unsigned char C : Name) {
if (C == '\\')
Out << '\\' << C;
else if (isPrint(C) && C != '"')
diff --git a/contrib/llvm-project/llvm/lib/Support/StringRef.cpp b/contrib/llvm-project/llvm/lib/Support/StringRef.cpp
index c532a1abe906..652303fdb6a0 100644
--- a/contrib/llvm-project/llvm/lib/Support/StringRef.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/StringRef.cpp
@@ -227,8 +227,8 @@ size_t StringRef::rfind_insensitive(StringRef Str) const {
StringRef::size_type StringRef::find_first_of(StringRef Chars,
size_t From) const {
std::bitset<1 << CHAR_BIT> CharBits;
- for (size_type i = 0; i != Chars.size(); ++i)
- CharBits.set((unsigned char)Chars[i]);
+ for (char C : Chars)
+ CharBits.set((unsigned char)C);
for (size_type i = std::min(From, Length), e = Length; i != e; ++i)
if (CharBits.test((unsigned char)Data[i]))
@@ -252,8 +252,8 @@ StringRef::size_type StringRef::find_first_not_of(char C, size_t From) const {
StringRef::size_type StringRef::find_first_not_of(StringRef Chars,
size_t From) const {
std::bitset<1 << CHAR_BIT> CharBits;
- for (size_type i = 0; i != Chars.size(); ++i)
- CharBits.set((unsigned char)Chars[i]);
+ for (char C : Chars)
+ CharBits.set((unsigned char)C);
for (size_type i = std::min(From, Length), e = Length; i != e; ++i)
if (!CharBits.test((unsigned char)Data[i]))
@@ -268,8 +268,8 @@ StringRef::size_type StringRef::find_first_not_of(StringRef Chars,
StringRef::size_type StringRef::find_last_of(StringRef Chars,
size_t From) const {
std::bitset<1 << CHAR_BIT> CharBits;
- for (size_type i = 0; i != Chars.size(); ++i)
- CharBits.set((unsigned char)Chars[i]);
+ for (char C : Chars)
+ CharBits.set((unsigned char)C);
for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i)
if (CharBits.test((unsigned char)Data[i]))
@@ -293,8 +293,8 @@ StringRef::size_type StringRef::find_last_not_of(char C, size_t From) const {
StringRef::size_type StringRef::find_last_not_of(StringRef Chars,
size_t From) const {
std::bitset<1 << CHAR_BIT> CharBits;
- for (size_type i = 0, e = Chars.size(); i != e; ++i)
- CharBits.set((unsigned char)Chars[i]);
+ for (char C : Chars)
+ CharBits.set((unsigned char)C);
for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i)
if (!CharBits.test((unsigned char)Data[i]))
diff --git a/contrib/llvm-project/llvm/lib/Support/TargetParser.cpp b/contrib/llvm-project/llvm/lib/Support/TargetParser.cpp
index 1dadce4b9040..4acc23dd455b 100644
--- a/contrib/llvm-project/llvm/lib/Support/TargetParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/TargetParser.cpp
@@ -333,3 +333,51 @@ bool getCPUFeaturesExceptStdExt(CPUKind Kind,
} // namespace RISCV
} // namespace llvm
+
+// Parse a branch protection specification, which has the form
+// standard | none | [bti,pac-ret[+b-key,+leaf]*]
+// Returns true on success, with individual elements of the specification
+// returned in `PBP`. Returns false in error, with `Err` containing
+// an erroneous part of the spec.
+bool ARM::parseBranchProtection(StringRef Spec, ParsedBranchProtection &PBP,
+ StringRef &Err) {
+ PBP = {"none", "a_key", false};
+ if (Spec == "none")
+ return true; // defaults are ok
+
+ if (Spec == "standard") {
+ PBP.Scope = "non-leaf";
+ PBP.BranchTargetEnforcement = true;
+ return true;
+ }
+
+ SmallVector<StringRef, 4> Opts;
+ Spec.split(Opts, "+");
+ for (int I = 0, E = Opts.size(); I != E; ++I) {
+ StringRef Opt = Opts[I].trim();
+ if (Opt == "bti") {
+ PBP.BranchTargetEnforcement = true;
+ continue;
+ }
+ if (Opt == "pac-ret") {
+ PBP.Scope = "non-leaf";
+ for (; I + 1 != E; ++I) {
+ StringRef PACOpt = Opts[I + 1].trim();
+ if (PACOpt == "leaf")
+ PBP.Scope = "all";
+ else if (PACOpt == "b-key")
+ PBP.Key = "b_key";
+ else
+ break;
+ }
+ continue;
+ }
+ if (Opt == "")
+ Err = "<empty>";
+ else
+ Err = Opt;
+ return false;
+ }
+
+ return true;
+}
diff --git a/contrib/llvm-project/llvm/lib/Support/ThreadPool.cpp b/contrib/llvm-project/llvm/lib/Support/ThreadPool.cpp
index 81926d8071b2..c11e16d3cf98 100644
--- a/contrib/llvm-project/llvm/lib/Support/ThreadPool.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/ThreadPool.cpp
@@ -29,7 +29,7 @@ ThreadPool::ThreadPool(ThreadPoolStrategy S)
Threads.emplace_back([S, ThreadID, this] {
S.apply_thread_strategy(ThreadID);
while (true) {
- PackagedTaskTy Task;
+ std::function<void()> Task;
{
std::unique_lock<std::mutex> LockGuard(QueueLock);
// Wait for tasks to be pushed in the queue
@@ -80,23 +80,6 @@ bool ThreadPool::isWorkerThread() const {
return false;
}
-std::shared_future<void> ThreadPool::asyncImpl(TaskTy Task) {
- /// Wrap the Task in a packaged_task to return a future object.
- PackagedTaskTy PackagedTask(std::move(Task));
- auto Future = PackagedTask.get_future();
- {
- // Lock the queue and push the new task
- std::unique_lock<std::mutex> LockGuard(QueueLock);
-
- // Don't allow enqueueing after disabling the pool
- assert(EnableFlag && "Queuing a thread during ThreadPool destruction");
-
- Tasks.push(std::move(PackagedTask));
- }
- QueueCondition.notify_one();
- return Future.share();
-}
-
// The destructor joins all threads, waiting for completion.
ThreadPool::~ThreadPool() {
{
@@ -128,16 +111,6 @@ void ThreadPool::wait() {
}
}
-std::shared_future<void> ThreadPool::asyncImpl(TaskTy Task) {
- // Get a Future with launch::deferred execution using std::async
- auto Future = std::async(std::launch::deferred, std::move(Task)).share();
- // Wrap the future so that both ThreadPool::wait() can operate and the
- // returned future can be sync'ed on.
- PackagedTaskTy PackagedTask([Future]() { Future.get(); });
- Tasks.push(std::move(PackagedTask));
- return Future;
-}
-
ThreadPool::~ThreadPool() { wait(); }
#endif
diff --git a/contrib/llvm-project/llvm/lib/TableGen/TGLexer.cpp b/contrib/llvm-project/llvm/lib/TableGen/TGLexer.cpp
index 2acac63ce843..25079fe33edb 100644
--- a/contrib/llvm-project/llvm/lib/TableGen/TGLexer.cpp
+++ b/contrib/llvm-project/llvm/lib/TableGen/TGLexer.cpp
@@ -1017,12 +1017,10 @@ void TGLexer::prepSkipToLineEnd() {
}
bool TGLexer::prepIsProcessingEnabled() {
- for (auto I = PrepIncludeStack.back()->rbegin(),
- E = PrepIncludeStack.back()->rend();
- I != E; ++I) {
- if (!I->IsDefined)
+ for (const PreprocessorControlDesc &I :
+ llvm::reverse(*PrepIncludeStack.back()))
+ if (!I.IsDefined)
return false;
- }
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index 9f527a17d390..aeebb49675b2 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -818,18 +818,9 @@ void AArch64AsmPrinter::emitJumpTableInfo() {
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
if (JT.empty()) return;
- const Function &F = MF->getFunction();
const TargetLoweringObjectFile &TLOF = getObjFileLowering();
- bool JTInDiffSection =
- !STI->isTargetCOFF() ||
- !TLOF.shouldPutJumpTableInFunctionSection(
- MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32,
- F);
- if (JTInDiffSection) {
- // Drop it in the readonly section.
- MCSection *ReadOnlySec = TLOF.getSectionForJumpTable(F, TM);
- OutStreamer->SwitchSection(ReadOnlySec);
- }
+ MCSection *ReadOnlySec = TLOF.getSectionForJumpTable(MF->getFunction(), TM);
+ OutStreamer->SwitchSection(ReadOnlySec);
auto AFI = MF->getInfo<AArch64FunctionInfo>();
for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) {
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CondBrTuning.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CondBrTuning.cpp
index 533ab3b05de9..ff4a4dfc1b95 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CondBrTuning.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64CondBrTuning.cpp
@@ -88,12 +88,9 @@ MachineInstr *AArch64CondBrTuning::convertToFlagSetting(MachineInstr &MI,
// If this is already the flag setting version of the instruction (e.g., SUBS)
// just make sure the implicit-def of NZCV isn't marked dead.
if (IsFlagSetting) {
- for (unsigned I = MI.getNumExplicitOperands(), E = MI.getNumOperands();
- I != E; ++I) {
- MachineOperand &MO = MI.getOperand(I);
+ for (MachineOperand &MO : MI.implicit_operands())
if (MO.isReg() && MO.isDead() && MO.getReg() == AArch64::NZCV)
MO.setIsDead(false);
- }
return &MI;
}
bool Is64Bit;
@@ -104,8 +101,8 @@ MachineInstr *AArch64CondBrTuning::convertToFlagSetting(MachineInstr &MI,
MachineInstrBuilder MIB = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
TII->get(NewOpc), NewDestReg);
- for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
- MIB.add(MI.getOperand(I));
+ for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
+ MIB.add(MO);
return MIB;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index 4c04e04a7d3c..ee6e670fe3cd 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -102,9 +102,8 @@ INITIALIZE_PASS(AArch64ExpandPseudo, "aarch64-expand-pseudo",
static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI,
MachineInstrBuilder &DefMI) {
const MCInstrDesc &Desc = OldMI.getDesc();
- for (unsigned i = Desc.getNumOperands(), e = OldMI.getNumOperands(); i != e;
- ++i) {
- const MachineOperand &MO = OldMI.getOperand(i);
+ for (const MachineOperand &MO :
+ llvm::drop_begin(OldMI.operands(), Desc.getNumOperands())) {
assert(MO.isReg() && MO.getReg());
if (MO.isUse())
UseMI.add(MO);
@@ -733,8 +732,9 @@ bool AArch64ExpandPseudo::expandCALL_RVMARKER(
MOP.getReg(), /*Def=*/false, /*Implicit=*/true));
RegMaskStartIdx++;
}
- for (; RegMaskStartIdx < MI.getNumOperands(); ++RegMaskStartIdx)
- OriginalCall->addOperand(MI.getOperand(RegMaskStartIdx));
+ for (const MachineOperand &MO :
+ llvm::drop_begin(MI.operands(), RegMaskStartIdx))
+ OriginalCall->addOperand(MO);
auto *Marker = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXrs))
.addReg(AArch64::FP, RegState::Define)
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 6e9e61c8e7ac..72461aa1f772 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -890,7 +890,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::ADD);
setTargetDAGCombine(ISD::ABS);
setTargetDAGCombine(ISD::SUB);
- setTargetDAGCombine(ISD::SRL);
setTargetDAGCombine(ISD::XOR);
setTargetDAGCombine(ISD::SINT_TO_FP);
setTargetDAGCombine(ISD::UINT_TO_FP);
@@ -930,6 +929,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::VECREDUCE_ADD);
setTargetDAGCombine(ISD::STEP_VECTOR);
+ setTargetDAGCombine(ISD::FP_EXTEND);
+
setTargetDAGCombine(ISD::GlobalAddress);
// In case of strict alignment, avoid an excessive number of byte wide stores.
@@ -1323,6 +1324,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::MGATHER, VT, Custom);
setOperationAction(ISD::MSCATTER, VT, Custom);
setOperationAction(ISD::MLOAD, VT, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
}
setOperationAction(ISD::SPLAT_VECTOR, MVT::nxv8bf16, Custom);
@@ -1504,6 +1506,24 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT) {
}
}
+bool AArch64TargetLowering::shouldExpandGetActiveLaneMask(EVT ResVT,
+ EVT OpVT) const {
+ // Only SVE has a 1:1 mapping from intrinsic -> instruction (whilelo).
+ if (!Subtarget->hasSVE())
+ return true;
+
+ // We can only support legal predicate result types.
+ if (ResVT != MVT::nxv2i1 && ResVT != MVT::nxv4i1 && ResVT != MVT::nxv8i1 &&
+ ResVT != MVT::nxv16i1)
+ return true;
+
+ // The whilelo instruction only works with i32 or i64 scalar inputs.
+ if (OpVT != MVT::i32 && OpVT != MVT::i64)
+ return true;
+
+ return false;
+}
+
void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
@@ -1528,7 +1548,7 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
setCondCodeAction(ISD::SETUNE, VT, Expand);
}
- // Mark integer truncating stores as having custom lowering
+ // Mark integer truncating stores/extending loads as having custom lowering
if (VT.isInteger()) {
MVT InnerVT = VT.changeVectorElementType(MVT::i8);
while (InnerVT != VT) {
@@ -1540,6 +1560,18 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
}
}
+ // Mark floating-point truncating stores/extending loads as having custom
+ // lowering
+ if (VT.isFloatingPoint()) {
+ MVT InnerVT = VT.changeVectorElementType(MVT::f16);
+ while (InnerVT != VT) {
+ setTruncStoreAction(VT, InnerVT, Custom);
+ setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Custom);
+ InnerVT = InnerVT.changeVectorElementType(
+ MVT::getFloatingPointVT(2 * InnerVT.getScalarSizeInBits()));
+ }
+ }
+
// Lower fixed length vector operations to scalable equivalents.
setOperationAction(ISD::ABS, VT, Custom);
setOperationAction(ISD::ADD, VT, Custom);
@@ -1950,6 +1982,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::UDIV_PRED)
MAKE_CASE(AArch64ISD::UMAX_PRED)
MAKE_CASE(AArch64ISD::UMIN_PRED)
+ MAKE_CASE(AArch64ISD::SRAD_MERGE_OP1)
MAKE_CASE(AArch64ISD::FNEG_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU)
@@ -2316,6 +2349,8 @@ static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V);
static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V);
static SDValue convertFixedMaskToScalableVector(SDValue Mask,
SelectionDAG &DAG);
+static SDValue getPredicateForScalableVector(SelectionDAG &DAG, SDLoc &DL,
+ EVT VT);
/// isZerosVector - Check whether SDNode N is a zero-filled vector.
static bool isZerosVector(const SDNode *N) {
@@ -4288,6 +4323,12 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
Op.getOperand(2), Op.getOperand(3));
}
+ case Intrinsic::get_active_lane_mask: {
+ SDValue ID =
+ DAG.getTargetConstant(Intrinsic::aarch64_sve_whilelo, dl, MVT::i64);
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(), ID,
+ Op.getOperand(1), Op.getOperand(2));
+ }
}
}
@@ -4506,7 +4547,7 @@ SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op,
}
InputVT = DAG.getValueType(MemVT.changeTypeToInteger());
Mask = DAG.getNode(
- ISD::ZERO_EXTEND, DL,
+ ISD::SIGN_EXTEND, DL,
VT.changeVectorElementType(IndexVT.getVectorElementType()), Mask);
}
@@ -4618,7 +4659,7 @@ SDValue AArch64TargetLowering::LowerMSCATTER(SDValue Op,
VT.changeVectorElementType(IndexVT.getVectorElementType()), StoreVal);
StoreVal = convertToScalableVector(DAG, IndexVT, StoreVal);
Mask = DAG.getNode(
- ISD::ZERO_EXTEND, DL,
+ ISD::SIGN_EXTEND, DL,
VT.changeVectorElementType(IndexVT.getVectorElementType()), Mask);
} else if (VT.isFloatingPoint()) {
// Handle FP data by casting the data so an integer scatter can be used.
@@ -10963,8 +11004,40 @@ SDValue AArch64TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op,
return SDValue();
}
+static bool isPow2Splat(SDValue Op, uint64_t &SplatVal, bool &Negated) {
+ if (Op.getOpcode() != AArch64ISD::DUP &&
+ Op.getOpcode() != ISD::SPLAT_VECTOR &&
+ Op.getOpcode() != ISD::BUILD_VECTOR)
+ return false;
+
+ if (Op.getOpcode() == ISD::BUILD_VECTOR &&
+ !isAllConstantBuildVector(Op, SplatVal))
+ return false;
+
+ if (Op.getOpcode() != ISD::BUILD_VECTOR &&
+ !isa<ConstantSDNode>(Op->getOperand(0)))
+ return false;
+
+ SplatVal = Op->getConstantOperandVal(0);
+ if (Op.getValueType().getVectorElementType() != MVT::i64)
+ SplatVal = (int32_t)SplatVal;
+
+ Negated = false;
+ if (isPowerOf2_64(SplatVal))
+ return true;
+
+ Negated = true;
+ if (isPowerOf2_64(-SplatVal)) {
+ SplatVal = -SplatVal;
+ return true;
+ }
+
+ return false;
+}
+
SDValue AArch64TargetLowering::LowerDIV(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
+ SDLoc dl(Op);
if (useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true))
return LowerFixedLengthVectorIntDivideToSVE(Op, DAG);
@@ -10974,6 +11047,19 @@ SDValue AArch64TargetLowering::LowerDIV(SDValue Op, SelectionDAG &DAG) const {
bool Signed = Op.getOpcode() == ISD::SDIV;
unsigned PredOpcode = Signed ? AArch64ISD::SDIV_PRED : AArch64ISD::UDIV_PRED;
+ bool Negated;
+ uint64_t SplatVal;
+ if (Signed && isPow2Splat(Op.getOperand(1), SplatVal, Negated)) {
+ SDValue Pg = getPredicateForScalableVector(DAG, dl, VT);
+ SDValue Res =
+ DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, dl, VT, Pg, Op->getOperand(0),
+ DAG.getTargetConstant(Log2_64(SplatVal), dl, MVT::i32));
+ if (Negated)
+ Res = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), Res);
+
+ return Res;
+ }
+
if (VT == MVT::nxv4i32 || VT == MVT::nxv2i64)
return LowerToPredicatedOp(Op, DAG, PredOpcode);
@@ -10987,7 +11073,6 @@ SDValue AArch64TargetLowering::LowerDIV(SDValue Op, SelectionDAG &DAG) const {
else
llvm_unreachable("Unexpected Custom DIV operation");
- SDLoc dl(Op);
unsigned UnpkLo = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
unsigned UnpkHi = Signed ? AArch64ISD::SUNPKHI : AArch64ISD::UUNPKHI;
SDValue Op0Lo = DAG.getNode(UnpkLo, dl, WidenedVT, Op.getOperand(0));
@@ -11924,6 +12009,12 @@ static bool areOperandsOfVmullHighP64(Value *Op1, Value *Op2) {
return isOperandOfVmullHighP64(Op1) && isOperandOfVmullHighP64(Op2);
}
+static bool isSplatShuffle(Value *V) {
+ if (auto *Shuf = dyn_cast<ShuffleVectorInst>(V))
+ return is_splat(Shuf->getShuffleMask());
+ return false;
+}
+
/// Check if sinking \p I's operands to I's basic block is profitable, because
/// the operands can be folded into a target instruction, e.g.
/// shufflevectors extracts and/or sext/zext can be folded into (u,s)subl(2).
@@ -11934,12 +12025,24 @@ bool AArch64TargetLowering::shouldSinkOperands(
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
switch (II->getIntrinsicID()) {
+ case Intrinsic::aarch64_neon_smull:
case Intrinsic::aarch64_neon_umull:
- if (!areExtractShuffleVectors(II->getOperand(0), II->getOperand(1)))
- return false;
- Ops.push_back(&II->getOperandUse(0));
- Ops.push_back(&II->getOperandUse(1));
- return true;
+ if (areExtractShuffleVectors(II->getOperand(0), II->getOperand(1))) {
+ Ops.push_back(&II->getOperandUse(0));
+ Ops.push_back(&II->getOperandUse(1));
+ return true;
+ }
+ LLVM_FALLTHROUGH;
+
+ case Intrinsic::aarch64_neon_sqdmull:
+ case Intrinsic::aarch64_neon_sqdmulh:
+ case Intrinsic::aarch64_neon_sqrdmulh:
+ // Sink splats for index lane variants
+ if (isSplatShuffle(II->getOperand(0)))
+ Ops.push_back(&II->getOperandUse(0));
+ if (isSplatShuffle(II->getOperand(1)))
+ Ops.push_back(&II->getOperandUse(1));
+ return !Ops.empty();
case Intrinsic::aarch64_neon_pmull64:
if (!areOperandsOfVmullHighP64(II->getArgOperand(0),
@@ -12961,8 +13064,14 @@ AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
if (isIntDivCheap(N->getValueType(0), Attr))
return SDValue(N,0); // Lower SDIV as SDIV
- // fold (sdiv X, pow2)
EVT VT = N->getValueType(0);
+
+ // For scalable and fixed types, mark them as cheap so we can handle it much
+ // later. This allows us to handle larger than legal types.
+ if (VT.isScalableVector() || Subtarget->useSVEForFixedLengthVectors())
+ return SDValue(N, 0);
+
+ // fold (sdiv X, pow2)
if ((VT != MVT::i32 && VT != MVT::i64) ||
!(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
return SDValue();
@@ -13858,34 +13967,6 @@ static SDValue performANDCombine(SDNode *N,
return SDValue();
}
-static SDValue performSRLCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI) {
- SelectionDAG &DAG = DCI.DAG;
- EVT VT = N->getValueType(0);
- if (VT != MVT::i32 && VT != MVT::i64)
- return SDValue();
-
- // Canonicalize (srl (bswap i32 x), 16) to (rotr (bswap i32 x), 16), if the
- // high 16-bits of x are zero. Similarly, canonicalize (srl (bswap i64 x), 32)
- // to (rotr (bswap i64 x), 32), if the high 32-bits of x are zero.
- SDValue N0 = N->getOperand(0);
- if (N0.getOpcode() == ISD::BSWAP) {
- SDLoc DL(N);
- SDValue N1 = N->getOperand(1);
- SDValue N00 = N0.getOperand(0);
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
- uint64_t ShiftAmt = C->getZExtValue();
- if (VT == MVT::i32 && ShiftAmt == 16 &&
- DAG.MaskedValueIsZero(N00, APInt::getHighBitsSet(32, 16)))
- return DAG.getNode(ISD::ROTR, DL, VT, N0, N1);
- if (VT == MVT::i64 && ShiftAmt == 32 &&
- DAG.MaskedValueIsZero(N00, APInt::getHighBitsSet(64, 32)))
- return DAG.getNode(ISD::ROTR, DL, VT, N0, N1);
- }
- }
- return SDValue();
-}
-
// Attempt to form urhadd(OpA, OpB) from
// truncate(vlshr(sub(zext(OpB), xor(zext(OpA), Ones(ElemSizeInBits))), 1))
// or uhadd(OpA, OpB) from truncate(vlshr(add(zext(OpA), zext(OpB)), 1)).
@@ -14031,6 +14112,9 @@ static SDValue performConcatVectorsCombine(SDNode *N,
SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
unsigned N0Opc = N0->getOpcode(), N1Opc = N1->getOpcode();
+ if (VT.isScalableVector())
+ return SDValue();
+
// Optimize concat_vectors of truncated vectors, where the intermediate
// type is illegal, to avoid said illegality, e.g.,
// (v4i16 (concat_vectors (v2i16 (truncate (v2i64))),
@@ -15089,6 +15173,9 @@ static SDValue performIntrinsicCombine(SDNode *N,
case Intrinsic::aarch64_sve_uqsub_x:
return DAG.getNode(ISD::USUBSAT, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2));
+ case Intrinsic::aarch64_sve_asrd:
+ return DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, SDLoc(N), N->getValueType(0),
+ N->getOperand(1), N->getOperand(2), N->getOperand(3));
case Intrinsic::aarch64_sve_cmphs:
if (!N->getOperand(2).getValueType().isFloatingPoint())
return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
@@ -15883,6 +15970,22 @@ static SDValue performSTORECombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG,
const AArch64Subtarget *Subtarget) {
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ SDValue Chain = ST->getChain();
+ SDValue Value = ST->getValue();
+ SDValue Ptr = ST->getBasePtr();
+
+ // If this is an FP_ROUND followed by a store, fold this into a truncating
+ // store. We can do this even if this is already a truncstore.
+ // We purposefully don't care about legality of the nodes here as we know
+ // they can be split down into something legal.
+ if (DCI.isBeforeLegalizeOps() && Value.getOpcode() == ISD::FP_ROUND &&
+ Value.getNode()->hasOneUse() && ST->isUnindexed() &&
+ Subtarget->useSVEForFixedLengthVectors() &&
+ Value.getValueType().isFixedLengthVector())
+ return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
+ ST->getMemoryVT(), ST->getMemOperand());
+
if (SDValue Split = splitStores(N, DCI, DAG, Subtarget))
return Split;
@@ -17225,6 +17328,37 @@ SDValue performSVESpliceCombine(SDNode *N, SelectionDAG &DAG) {
return DAG.getBitcast(Ty, Trunc);
}
+SDValue performFPExtendCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const AArch64Subtarget *Subtarget) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
+ if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::FP_ROUND)
+ return SDValue();
+
+ // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
+ // We purposefully don't care about legality of the nodes here as we know
+ // they can be split down into something legal.
+ if (DCI.isBeforeLegalizeOps() && ISD::isNormalLoad(N0.getNode()) &&
+ N0.hasOneUse() && Subtarget->useSVEForFixedLengthVectors() &&
+ VT.isFixedLengthVector()) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
+ LN0->getChain(), LN0->getBasePtr(),
+ N0.getValueType(), LN0->getMemOperand());
+ DCI.CombineTo(N, ExtLoad);
+ DCI.CombineTo(N0.getNode(),
+ DAG.getNode(ISD::FP_ROUND, SDLoc(N0), N0.getValueType(),
+ ExtLoad, DAG.getIntPtrConstant(1, SDLoc(N0))),
+ ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+
+ return SDValue();
+}
+
SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -17253,8 +17387,6 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performORCombine(N, DCI, Subtarget);
case ISD::AND:
return performANDCombine(N, DCI);
- case ISD::SRL:
- return performSRLCombine(N, DCI);
case ISD::INTRINSIC_WO_CHAIN:
return performIntrinsicCombine(N, DCI, Subtarget);
case ISD::ANY_EXTEND:
@@ -17283,6 +17415,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performSTORECombine(N, DCI, DAG, Subtarget);
case ISD::VECTOR_SPLICE:
return performSVESpliceCombine(N, DAG);
+ case ISD::FP_EXTEND:
+ return performFPExtendCombine(N, DAG, DCI, Subtarget);
case AArch64ISD::BRCOND:
return performBRCONDCombine(N, DCI, DAG);
case AArch64ISD::TBNZ:
@@ -18414,6 +18548,15 @@ bool AArch64TargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
return VT.isScalarInteger();
}
+bool AArch64TargetLowering::shouldConvertFpToSat(unsigned Op, EVT FPVT,
+ EVT VT) const {
+ // v8f16 without fp16 need to be extended to v8f32, which is more difficult to
+ // legalize.
+ if (FPVT == MVT::v8f16 && !Subtarget->hasFullFP16())
+ return false;
+ return TargetLowering::shouldConvertFpToSat(Op, FPVT, VT);
+}
+
bool AArch64TargetLowering::enableAggressiveFMAFusion(EVT VT) const {
return Subtarget->hasAggressiveFMA() && VT.isFloatingPoint();
}
@@ -18591,12 +18734,29 @@ SDValue AArch64TargetLowering::LowerFixedLengthVectorLoadToSVE(
SDLoc DL(Op);
EVT VT = Op.getValueType();
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
+ EVT LoadVT = ContainerVT;
+ EVT MemVT = Load->getMemoryVT();
+
+ auto Pg = getPredicateForFixedLengthVector(DAG, DL, VT);
+
+ if (VT.isFloatingPoint() && Load->getExtensionType() == ISD::EXTLOAD) {
+ LoadVT = ContainerVT.changeTypeToInteger();
+ MemVT = MemVT.changeTypeToInteger();
+ }
auto NewLoad = DAG.getMaskedLoad(
- ContainerVT, DL, Load->getChain(), Load->getBasePtr(), Load->getOffset(),
- getPredicateForFixedLengthVector(DAG, DL, VT), DAG.getUNDEF(ContainerVT),
- Load->getMemoryVT(), Load->getMemOperand(), Load->getAddressingMode(),
- Load->getExtensionType());
+ LoadVT, DL, Load->getChain(), Load->getBasePtr(), Load->getOffset(), Pg,
+ DAG.getUNDEF(LoadVT), MemVT, Load->getMemOperand(),
+ Load->getAddressingMode(), Load->getExtensionType());
+
+ if (VT.isFloatingPoint() && Load->getExtensionType() == ISD::EXTLOAD) {
+ EVT ExtendVT = ContainerVT.changeVectorElementType(
+ Load->getMemoryVT().getVectorElementType());
+
+ NewLoad = getSVESafeBitCast(ExtendVT, NewLoad, DAG);
+ NewLoad = DAG.getNode(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU, DL, ContainerVT,
+ Pg, NewLoad, DAG.getUNDEF(ContainerVT));
+ }
auto Result = convertFromScalableVector(DAG, VT, NewLoad);
SDValue MergedValues[2] = {Result, Load->getChain()};
@@ -18609,12 +18769,15 @@ static SDValue convertFixedMaskToScalableVector(SDValue Mask,
EVT InVT = Mask.getValueType();
EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
+ auto Pg = getPredicateForFixedLengthVector(DAG, DL, InVT);
+
+ if (ISD::isBuildVectorAllOnes(Mask.getNode()))
+ return Pg;
+
auto Op1 = convertToScalableVector(DAG, ContainerVT, Mask);
auto Op2 = DAG.getConstant(0, DL, ContainerVT);
- auto Pg = getPredicateForFixedLengthVector(DAG, DL, InVT);
- EVT CmpVT = Pg.getValueType();
- return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, DL, CmpVT,
+ return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, DL, Pg.getValueType(),
{Pg, Op1, Op2, DAG.getCondCode(ISD::SETNE)});
}
@@ -18668,13 +18831,26 @@ SDValue AArch64TargetLowering::LowerFixedLengthVectorStoreToSVE(
SDLoc DL(Op);
EVT VT = Store->getValue().getValueType();
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
+ EVT MemVT = Store->getMemoryVT();
+ auto Pg = getPredicateForFixedLengthVector(DAG, DL, VT);
auto NewValue = convertToScalableVector(DAG, ContainerVT, Store->getValue());
- return DAG.getMaskedStore(
- Store->getChain(), DL, NewValue, Store->getBasePtr(), Store->getOffset(),
- getPredicateForFixedLengthVector(DAG, DL, VT), Store->getMemoryVT(),
- Store->getMemOperand(), Store->getAddressingMode(),
- Store->isTruncatingStore());
+
+ if (VT.isFloatingPoint() && Store->isTruncatingStore()) {
+ EVT TruncVT = ContainerVT.changeVectorElementType(
+ Store->getMemoryVT().getVectorElementType());
+ MemVT = MemVT.changeTypeToInteger();
+ NewValue = DAG.getNode(AArch64ISD::FP_ROUND_MERGE_PASSTHRU, DL, TruncVT, Pg,
+ NewValue, DAG.getTargetConstant(0, DL, MVT::i64),
+ DAG.getUNDEF(TruncVT));
+ NewValue =
+ getSVESafeBitCast(ContainerVT.changeTypeToInteger(), NewValue, DAG);
+ }
+
+ return DAG.getMaskedStore(Store->getChain(), DL, NewValue,
+ Store->getBasePtr(), Store->getOffset(), Pg, MemVT,
+ Store->getMemOperand(), Store->getAddressingMode(),
+ Store->isTruncatingStore());
}
SDValue AArch64TargetLowering::LowerFixedLengthVectorMStoreToSVE(
@@ -18706,6 +18882,21 @@ SDValue AArch64TargetLowering::LowerFixedLengthVectorIntDivideToSVE(
bool Signed = Op.getOpcode() == ISD::SDIV;
unsigned PredOpcode = Signed ? AArch64ISD::SDIV_PRED : AArch64ISD::UDIV_PRED;
+ bool Negated;
+ uint64_t SplatVal;
+ if (Signed && isPow2Splat(Op.getOperand(1), SplatVal, Negated)) {
+ EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
+ SDValue Op1 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(0));
+ SDValue Op2 = DAG.getTargetConstant(Log2_64(SplatVal), dl, MVT::i32);
+
+ SDValue Pg = getPredicateForFixedLengthVector(DAG, dl, VT);
+ SDValue Res = DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, dl, ContainerVT, Pg, Op1, Op2);
+ if (Negated)
+ Res = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), Res);
+
+ return convertFromScalableVector(DAG, VT, Res);
+ }
+
// Scalable vector i32/i64 DIV is supported.
if (EltVT == MVT::i32 || EltVT == MVT::i64)
return LowerToPredicatedOp(Op, DAG, PredOpcode, /*OverrideNEON=*/true);
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 392e22b68366..ea884cdccd28 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -104,6 +104,8 @@ enum NodeType : unsigned {
// Unpredicated vector instructions
BIC,
+ SRAD_MERGE_OP1,
+
// Predicated instructions with the result of inactive lanes provided by the
// last operand.
FABS_MERGE_PASSTHRU,
@@ -774,6 +776,8 @@ public:
bool preferIncOfAddToSubOfNot(EVT VT) const override;
+ bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override;
+
bool hasBitPreservingFPLogic(EVT VT) const override {
// FIXME: Is this always true? It should be true for vectors at least.
return VT == MVT::f32 || VT == MVT::f64;
@@ -842,6 +846,8 @@ public:
EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty,
bool AllowUnknown = false) const override;
+ bool shouldExpandGetActiveLaneMask(EVT VT, EVT OpVT) const override;
+
private:
/// Keep a pointer to the AArch64Subtarget around so that we can
/// make the right decision when generating code for different targets.
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index db8e0c5dac4a..decee117d2d5 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -437,6 +437,18 @@ def non_temporal_store :
cast<MaskedStoreSDNode>(N)->isNonTemporal();
}]>;
+// top16Zero - answer true if the upper 16 bits of $src are 0, false otherwise
+def top16Zero: PatLeaf<(i32 GPR32:$src), [{
+ return SDValue(N,0)->getValueType(0) == MVT::i32 &&
+ CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 16));
+ }]>;
+
+// top32Zero - answer true if the upper 32 bits of $src are 0, false otherwise
+def top32Zero: PatLeaf<(i64 GPR64:$src), [{
+ return SDValue(N,0)->getValueType(0) == MVT::i64 &&
+ CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(64, 32));
+ }]>;
+
// Node definitions.
def AArch64adrp : SDNode<"AArch64ISD::ADRP", SDTIntUnaryOp, []>;
def AArch64adr : SDNode<"AArch64ISD::ADR", SDTIntUnaryOp, []>;
@@ -2046,6 +2058,10 @@ def : InstAlias<"rev64 $Rd, $Rn", (REVXr GPR64:$Rd, GPR64:$Rn), 0>;
def : Pat<(bswap (rotr GPR32:$Rn, (i64 16))), (REV16Wr GPR32:$Rn)>;
def : Pat<(bswap (rotr GPR64:$Rn, (i64 32))), (REV32Xr GPR64:$Rn)>;
+// Match (srl (bswap x), C) -> revC if the upper bswap bits are known zero.
+def : Pat<(srl (bswap top16Zero:$Rn), (i64 16)), (REV16Wr GPR32:$Rn)>;
+def : Pat<(srl (bswap top32Zero:$Rn), (i64 32)), (REV32Xr GPR64:$Rn)>;
+
//===----------------------------------------------------------------------===//
// Bitfield immediate extraction instruction.
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 67d8fbb45cf5..25d53f4ab065 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -199,6 +199,13 @@ def AArch64umax_p : SDNode<"AArch64ISD::UMAX_PRED", SDT_AArch64Arith>;
def AArch64umin_p : SDNode<"AArch64ISD::UMIN_PRED", SDT_AArch64Arith>;
def AArch64umulh_p : SDNode<"AArch64ISD::MULHU_PRED", SDT_AArch64Arith>;
+def SDT_AArch64Arith_Imm : SDTypeProfile<1, 3, [
+ SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVT<3,i32>,
+ SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2>
+]>;
+
+def AArch64asrd_m1 : SDNode<"AArch64ISD::SRAD_MERGE_OP1", SDT_AArch64Arith_Imm>;
+
def SDT_AArch64IntExtend : SDTypeProfile<1, 4, [
SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVT<3, OtherVT>, SDTCisVec<4>,
SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2>, SDTCisVTSmallerThanOp<3, 2>, SDTCisSameAs<0,4>
@@ -1575,7 +1582,7 @@ let Predicates = [HasSVEorStreamingSVE] in {
defm ASR_ZPmI : sve_int_bin_pred_shift_imm_right_dup<0b0000, "asr", "ASR_ZPZI", int_aarch64_sve_asr>;
defm LSR_ZPmI : sve_int_bin_pred_shift_imm_right_dup<0b0001, "lsr", "LSR_ZPZI", int_aarch64_sve_lsr>;
defm LSL_ZPmI : sve_int_bin_pred_shift_imm_left_dup< 0b0011, "lsl", "LSL_ZPZI", int_aarch64_sve_lsl>;
- defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right< 0b0100, "asrd", "ASRD_ZPZI", int_aarch64_sve_asrd>;
+ defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right< 0b0100, "asrd", "ASRD_ZPZI", AArch64asrd_m1>;
defm ASR_ZPZI : sve_int_shift_pred_bhsd<AArch64asr_p, SVEShiftImmR8, SVEShiftImmR16, SVEShiftImmR32, SVEShiftImmR64>;
defm LSR_ZPZI : sve_int_shift_pred_bhsd<AArch64lsr_p, SVEShiftImmR8, SVEShiftImmR16, SVEShiftImmR32, SVEShiftImmR64>;
@@ -1586,7 +1593,7 @@ let Predicates = [HasSVEorStreamingSVE, UseExperimentalZeroingPseudos] in {
defm ASR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_asr>;
defm LSR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_lsr>;
defm LSL_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_lsl>;
- defm ASRD_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd<int_aarch64_sve_asrd>;
+ defm ASRD_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd<AArch64asrd_m1>;
} // End HasSVEorStreamingSVE, UseExperimentalZeroingPseudos
let Predicates = [HasSVEorStreamingSVE] in {
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 63d6fa5bbb26..34015d2dbd49 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -833,17 +833,12 @@ static Optional<Instruction *> instCombineSVEVectorMul(InstCombiner &IC,
return match(SplatValue, m_FPOne()) || match(SplatValue, m_One());
};
- // The OpMultiplier variable should always point to the dup (if any), so
- // swap if necessary.
- if (IsUnitDup(OpMultiplicand) || IsUnitSplat(OpMultiplicand))
- std::swap(OpMultiplier, OpMultiplicand);
-
if (IsUnitSplat(OpMultiplier)) {
- // [f]mul pg (dupx 1) %n => %n
+ // [f]mul pg %n, (dupx 1) => %n
OpMultiplicand->takeName(&II);
return IC.replaceInstUsesWith(II, OpMultiplicand);
} else if (IsUnitDup(OpMultiplier)) {
- // [f]mul pg (dup pg 1) %n => %n
+ // [f]mul pg %n, (dup pg 1) => %n
auto *DupInst = cast<IntrinsicInst>(OpMultiplier);
auto *DupPg = DupInst->getOperand(1);
// TODO: this is naive. The optimization is still valid if DupPg
@@ -2142,6 +2137,7 @@ bool AArch64TTIImpl::isLegalToVectorizeReduction(
case RecurKind::FMax:
case RecurKind::SelectICmp:
case RecurKind::SelectFCmp:
+ case RecurKind::FMulAdd:
return true;
default:
return false;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index e090d87d59a2..3d9a626d3ac3 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -1920,35 +1920,6 @@ bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
MachineRegisterInfo &MRI = MF.getRegInfo();
switch (I.getOpcode()) {
- case TargetOpcode::G_SHL:
- case TargetOpcode::G_ASHR:
- case TargetOpcode::G_LSHR: {
- // These shifts are legalized to have 64 bit shift amounts because we want
- // to take advantage of the existing imported selection patterns that assume
- // the immediates are s64s. However, if the shifted type is 32 bits and for
- // some reason we receive input GMIR that has an s64 shift amount that's not
- // a G_CONSTANT, insert a truncate so that we can still select the s32
- // register-register variant.
- Register SrcReg = I.getOperand(1).getReg();
- Register ShiftReg = I.getOperand(2).getReg();
- const LLT ShiftTy = MRI.getType(ShiftReg);
- const LLT SrcTy = MRI.getType(SrcReg);
- if (SrcTy.isVector())
- return false;
- assert(!ShiftTy.isVector() && "unexpected vector shift ty");
- if (SrcTy.getSizeInBits() != 32 || ShiftTy.getSizeInBits() != 64)
- return false;
- auto *AmtMI = MRI.getVRegDef(ShiftReg);
- assert(AmtMI && "could not find a vreg definition for shift amount");
- if (AmtMI->getOpcode() != TargetOpcode::G_CONSTANT) {
- // Insert a subregister copy to implement a 64->32 trunc
- auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
- .addReg(ShiftReg, 0, AArch64::sub_32);
- MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
- I.getOperand(2).setReg(Trunc.getReg(0));
- }
- return true;
- }
case TargetOpcode::G_STORE: {
bool Changed = contractCrossBankCopyIntoStore(I, MRI);
MachineOperand &SrcOp = I.getOperand(0);
@@ -2950,6 +2921,28 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
if (Opcode == TargetOpcode::G_SHL &&
MRI.getType(I.getOperand(0).getReg()).isVector())
return selectVectorSHL(I, MRI);
+
+ // These shifts were legalized to have 64 bit shift amounts because we
+ // want to take advantage of the selection patterns that assume the
+ // immediates are s64s, however, selectBinaryOp will assume both operands
+ // will have the same bit size.
+ {
+ Register SrcReg = I.getOperand(1).getReg();
+ Register ShiftReg = I.getOperand(2).getReg();
+ const LLT ShiftTy = MRI.getType(ShiftReg);
+ const LLT SrcTy = MRI.getType(SrcReg);
+ if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
+ ShiftTy.getSizeInBits() == 64) {
+ assert(!ShiftTy.isVector() && "unexpected vector shift ty");
+ assert(MRI.getVRegDef(ShiftReg) &&
+ "could not find a vreg definition for shift amount");
+ // Insert a subregister copy to implement a 64->32 trunc
+ auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
+ .addReg(ShiftReg, 0, AArch64::sub_32);
+ MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
+ I.getOperand(2).setReg(Trunc.getReg(0));
+ }
+ }
LLVM_FALLTHROUGH;
case TargetOpcode::G_FADD:
case TargetOpcode::G_FSUB:
@@ -6452,8 +6445,7 @@ static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder MIB(MI);
// Go through each operand and ensure it has the same regbank.
- for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
- MachineOperand &MO = MI.getOperand(OpIdx);
+ for (MachineOperand &MO : llvm::drop_begin(MI.operands())) {
if (!MO.isReg())
continue;
Register OpReg = MO.getReg();
@@ -6511,8 +6503,7 @@ void AArch64InstructionSelector::processPHIs(MachineFunction &MF) {
// %endbb:
// %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2
bool HasGPROp = false, HasFPROp = false;
- for (unsigned OpIdx = 1; OpIdx < MI->getNumOperands(); ++OpIdx) {
- const auto &MO = MI->getOperand(OpIdx);
+ for (const MachineOperand &MO : llvm::drop_begin(MI->operands())) {
if (!MO.isReg())
continue;
const LLT &Ty = MRI.getType(MO.getReg());
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index f2a470857d21..78c0e90b1384 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -177,8 +177,8 @@ public:
// We can't just use EmitIntValue here, as that will emit a data mapping
// symbol, and swap the endianness on big-endian systems (instructions are
// always little-endian).
- for (unsigned I = 0; I < 4; ++I) {
- Buffer[I] = uint8_t(Inst);
+ for (char &C : Buffer) {
+ C = uint8_t(Inst);
Inst >>= 8;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
index cf1a60643efd..92552c3d41d5 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
@@ -101,8 +101,8 @@ void AArch64TargetStreamer::emitInst(uint32_t Inst) {
// We can't just use EmitIntValue here, as that will swap the
// endianness on big-endian systems (instructions are always
// little-endian).
- for (unsigned I = 0; I < 4; ++I) {
- Buffer[I] = uint8_t(Inst);
+ for (char &C : Buffer) {
+ C = uint8_t(Inst);
Inst >>= 8;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
index df2f9a0fa3a9..c7c5ff7bcbe7 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
@@ -26,6 +26,14 @@ def uchar_to_float : GICombineRule<
[{ return PostLegalizerHelper.matchUCharToFloat(*${itofp}); }]),
(apply [{ PostLegalizerHelper.applyUCharToFloat(*${itofp}); }])>;
+
+def rcp_sqrt_to_rsq : GICombineRule<
+ (defs root:$rcp, build_fn_matchinfo:$matchinfo),
+ (match (wip_match_opcode G_INTRINSIC, G_FSQRT):$rcp,
+ [{ return PostLegalizerHelper.matchRcpSqrtToRsq(*${rcp}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFn(*${rcp}, ${matchinfo}); }])>;
+
+
def cvt_f32_ubyteN_matchdata : GIDefMatchData<"AMDGPUPostLegalizerCombinerHelper::CvtF32UByteMatchInfo">;
def cvt_f32_ubyteN : GICombineRule<
@@ -86,7 +94,8 @@ def AMDGPUPreLegalizerCombinerHelper: GICombinerHelper<
def AMDGPUPostLegalizerCombinerHelper: GICombinerHelper<
"AMDGPUGenPostLegalizerCombinerHelper",
[all_combines, gfx6gfx7_combines,
- uchar_to_float, cvt_f32_ubyteN, remove_fcanonicalize, foldable_fneg]> {
+ uchar_to_float, cvt_f32_ubyteN, remove_fcanonicalize, foldable_fneg,
+ rcp_sqrt_to_rsq]> {
let DisableRuleOption = "amdgpupostlegalizercombiner-disable-rule";
let StateClass = "AMDGPUPostLegalizerCombinerHelperState";
let AdditionalArguments = [];
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index cee56ee97294..8236e6672247 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -654,6 +654,9 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
SelectMAD_64_32(N);
return;
}
+ case ISD::SMUL_LOHI:
+ case ISD::UMUL_LOHI:
+ return SelectMUL_LOHI(N);
case ISD::CopyToReg: {
const SITargetLowering& Lowering =
*static_cast<const SITargetLowering*>(getTargetLowering());
@@ -719,6 +722,18 @@ bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
Term->getMetadata("structurizecfg.uniform");
}
+bool AMDGPUDAGToDAGISel::isUnneededShiftMask(const SDNode *N,
+ unsigned ShAmtBits) const {
+ assert(N->getOpcode() == ISD::AND);
+
+ const APInt &RHS = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();
+ if (RHS.countTrailingOnes() >= ShAmtBits)
+ return true;
+
+ const APInt &LHSKnownZeros = CurDAG->computeKnownBits(N->getOperand(0)).Zero;
+ return (LHSKnownZeros | RHS).countTrailingOnes() >= ShAmtBits;
+}
+
static bool getBaseWithOffsetUsingSplitOR(SelectionDAG &DAG, SDValue Addr,
SDValue &N0, SDValue &N1) {
if (Addr.getValueType() == MVT::i64 && Addr.getOpcode() == ISD::BITCAST &&
@@ -1001,6 +1016,32 @@ void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {
CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
}
+// We need to handle this here because tablegen doesn't support matching
+// instructions with multiple outputs.
+void AMDGPUDAGToDAGISel::SelectMUL_LOHI(SDNode *N) {
+ SDLoc SL(N);
+ bool Signed = N->getOpcode() == ISD::SMUL_LOHI;
+ unsigned Opc = Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
+
+ SDValue Zero = CurDAG->getTargetConstant(0, SL, MVT::i64);
+ SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);
+ SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Zero, Clamp};
+ SDNode *Mad = CurDAG->getMachineNode(Opc, SL, N->getVTList(), Ops);
+ if (!SDValue(N, 0).use_empty()) {
+ SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32);
+ SDNode *Lo = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SL,
+ MVT::i32, SDValue(Mad, 0), Sub0);
+ ReplaceUses(SDValue(N, 0), SDValue(Lo, 0));
+ }
+ if (!SDValue(N, 1).use_empty()) {
+ SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32);
+ SDNode *Hi = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SL,
+ MVT::i32, SDValue(Mad, 0), Sub1);
+ ReplaceUses(SDValue(N, 1), SDValue(Hi, 0));
+ }
+ CurDAG->RemoveDeadNode(N);
+}
+
bool AMDGPUDAGToDAGISel::isDSOffsetLegal(SDValue Base, unsigned Offset) const {
if (!isUInt<16>(Offset))
return false;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index c1d9673f067e..d638d9877a9b 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -136,6 +136,10 @@ private:
bool isUniformLoad(const SDNode *N) const;
bool isUniformBr(const SDNode *N) const;
+ // Returns true if ISD::AND SDNode `N`'s masking of the shift amount operand's
+ // `ShAmtBits` bits is unneeded.
+ bool isUnneededShiftMask(const SDNode *N, unsigned ShAmtBits) const;
+
bool isBaseWithConstantOffset64(SDValue Addr, SDValue &LHS,
SDValue &RHS) const;
@@ -231,6 +235,7 @@ private:
void SelectUADDO_USUBO(SDNode *N);
void SelectDIV_SCALE(SDNode *N);
void SelectMAD_64_32(SDNode *N);
+ void SelectMUL_LOHI(SDNode *N);
void SelectFMA_W_CHAIN(SDNode *N);
void SelectFMUL_W_CHAIN(SDNode *N);
SDNode *getBFE32(bool IsSigned, const SDLoc &DL, SDValue Val, uint32_t Offset,
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 523fa2d3724b..54177564afbc 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -594,6 +594,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::SRL);
setTargetDAGCombine(ISD::TRUNCATE);
setTargetDAGCombine(ISD::MUL);
+ setTargetDAGCombine(ISD::SMUL_LOHI);
+ setTargetDAGCombine(ISD::UMUL_LOHI);
setTargetDAGCombine(ISD::MULHU);
setTargetDAGCombine(ISD::MULHS);
setTargetDAGCombine(ISD::SELECT);
@@ -3462,6 +3464,50 @@ SDValue AMDGPUTargetLowering::performMulCombine(SDNode *N,
return DAG.getSExtOrTrunc(Mul, DL, VT);
}
+SDValue
+AMDGPUTargetLowering::performMulLoHiCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ if (N->getValueType(0) != MVT::i32)
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc DL(N);
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // SimplifyDemandedBits has the annoying habit of turning useful zero_extends
+ // in the source into any_extends if the result of the mul is truncated. Since
+ // we can assume the high bits are whatever we want, use the underlying value
+ // to avoid the unknown high bits from interfering.
+ if (N0.getOpcode() == ISD::ANY_EXTEND)
+ N0 = N0.getOperand(0);
+ if (N1.getOpcode() == ISD::ANY_EXTEND)
+ N1 = N1.getOperand(0);
+
+ // Try to use two fast 24-bit multiplies (one for each half of the result)
+ // instead of one slow extending multiply.
+ unsigned LoOpcode, HiOpcode;
+ if (Subtarget->hasMulU24() && isU24(N0, DAG) && isU24(N1, DAG)) {
+ N0 = DAG.getZExtOrTrunc(N0, DL, MVT::i32);
+ N1 = DAG.getZExtOrTrunc(N1, DL, MVT::i32);
+ LoOpcode = AMDGPUISD::MUL_U24;
+ HiOpcode = AMDGPUISD::MULHI_U24;
+ } else if (Subtarget->hasMulI24() && isI24(N0, DAG) && isI24(N1, DAG)) {
+ N0 = DAG.getSExtOrTrunc(N0, DL, MVT::i32);
+ N1 = DAG.getSExtOrTrunc(N1, DL, MVT::i32);
+ LoOpcode = AMDGPUISD::MUL_I24;
+ HiOpcode = AMDGPUISD::MULHI_I24;
+ } else {
+ return SDValue();
+ }
+
+ SDValue Lo = DAG.getNode(LoOpcode, DL, MVT::i32, N0, N1);
+ SDValue Hi = DAG.getNode(HiOpcode, DL, MVT::i32, N0, N1);
+ DCI.CombineTo(N, Lo, Hi);
+ return SDValue(N, 0);
+}
+
SDValue AMDGPUTargetLowering::performMulhsCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
EVT VT = N->getValueType(0);
@@ -4103,6 +4149,9 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
return performTruncateCombine(N, DCI);
case ISD::MUL:
return performMulCombine(N, DCI);
+ case ISD::SMUL_LOHI:
+ case ISD::UMUL_LOHI:
+ return performMulLoHiCombine(N, DCI);
case ISD::MULHS:
return performMulhsCombine(N, DCI);
case ISD::MULHU:
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 03632ac18598..daaca8737c5d 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -91,6 +91,7 @@ protected:
SDValue performSrlCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performTruncateCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performMulCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue performMulLoHiCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performMulhsCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performMulhuCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performCtlz_CttzCombine(const SDLoc &SL, SDValue Cond, SDValue LHS,
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 28cb2fc57ac7..e16bead81b65 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -3880,6 +3880,22 @@ bool AMDGPUInstructionSelector::isDSOffset2Legal(Register Base, int64_t Offset0,
return KnownBits->signBitIsZero(Base);
}
+bool AMDGPUInstructionSelector::isUnneededShiftMask(const MachineInstr &MI,
+ unsigned ShAmtBits) const {
+ assert(MI.getOpcode() == TargetOpcode::G_AND);
+
+ Optional<APInt> RHS = getIConstantVRegVal(MI.getOperand(2).getReg(), *MRI);
+ if (!RHS)
+ return false;
+
+ if (RHS->countTrailingOnes() >= ShAmtBits)
+ return true;
+
+ const APInt &LHSKnownZeros =
+ KnownBits->getKnownZeroes(MI.getOperand(1).getReg());
+ return (LHSKnownZeros | *RHS).countTrailingOnes() >= ShAmtBits;
+}
+
InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectMUBUFScratchOffset(
MachineOperand &Root) const {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index b70e6883bae2..26996e42af53 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -300,6 +300,10 @@ private:
bool isInlineImmediate64(int64_t Imm) const;
bool isInlineImmediate(const APFloat &Imm) const;
+ // Returns true if TargetOpcode::G_AND MachineInstr `MI`'s masking of the
+ // shift amount operand's `ShAmtBits` bits is unneeded.
+ bool isUnneededShiftMask(const MachineInstr &MI, unsigned ShAmtBits) const;
+
const SIInstrInfo &TII;
const SIRegisterInfo &TRI;
const AMDGPURegisterBankInfo &RBI;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index bad9f6265b36..0528b552f475 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -242,25 +242,41 @@ def AMDGPUmul_i24_oneuse : HasOneUseBinOp<AMDGPUmul_i24>;
//===----------------------------------------------------------------------===//
// Constrained shift PatFrags.
+
+def csh_mask_16 : PatFrag<(ops node:$src0), (and node:$src0, imm),
+ [{ return isUnneededShiftMask(N, 4); }]> {
+ let GISelPredicateCode = [{ return isUnneededShiftMask(MI, 4); }];
+ }
+
+def csh_mask_32 : PatFrag<(ops node:$src0), (and node:$src0, imm),
+ [{ return isUnneededShiftMask(N, 5); }]> {
+ let GISelPredicateCode = [{ return isUnneededShiftMask(MI, 5); }];
+ }
+
+def csh_mask_64 : PatFrag<(ops node:$src0), (and node:$src0, imm),
+ [{ return isUnneededShiftMask(N, 6); }]> {
+ let GISelPredicateCode = [{ return isUnneededShiftMask(MI, 6); }];
+ }
+
foreach width = [16, 32, 64] in {
-defvar mask = !sub(width, 1);
+defvar csh_mask = !cast<SDPatternOperator>("csh_mask_"#width);
def cshl_#width : PatFrags<(ops node:$src0, node:$src1),
- [(shl node:$src0, node:$src1), (shl node:$src0, (and node:$src1, mask))]>;
+ [(shl node:$src0, node:$src1), (shl node:$src0, (csh_mask node:$src1))]>;
defvar cshl = !cast<SDPatternOperator>("cshl_"#width);
def cshl_#width#_oneuse : HasOneUseBinOp<cshl>;
def clshl_rev_#width : PatFrag <(ops node:$src0, node:$src1),
(cshl $src1, $src0)>;
def csrl_#width : PatFrags<(ops node:$src0, node:$src1),
- [(srl node:$src0, node:$src1), (srl node:$src0, (and node:$src1, mask))]>;
+ [(srl node:$src0, node:$src1), (srl node:$src0, (csh_mask node:$src1))]>;
defvar csrl = !cast<SDPatternOperator>("csrl_"#width);
def csrl_#width#_oneuse : HasOneUseBinOp<csrl>;
def clshr_rev_#width : PatFrag <(ops node:$src0, node:$src1),
(csrl $src1, $src0)>;
def csra_#width : PatFrags<(ops node:$src0, node:$src1),
- [(sra node:$src0, node:$src1), (sra node:$src0, (and node:$src1, mask))]>;
+ [(sra node:$src0, node:$src1), (sra node:$src0, (csh_mask node:$src1))]>;
defvar csra = !cast<SDPatternOperator>("csra_"#width);
def csra_#width#_oneuse : HasOneUseBinOp<csra>;
def cashr_rev_#width : PatFrag <(ops node:$src0, node:$src1),
@@ -696,11 +712,6 @@ class RcpPat<Instruction RcpInst, ValueType vt> : AMDGPUPat <
(RcpInst $src)
>;
-class RsqPat<Instruction RsqInst, ValueType vt> : AMDGPUPat <
- (AMDGPUrcp (fsqrt vt:$src)),
- (RsqInst $src)
->;
-
// Instructions which select to the same v_min_f*
def fminnum_like : PatFrags<(ops node:$src0, node:$src1),
[(fminnum_ieee node:$src0, node:$src1),
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
index fc984d2dda64..1479933a2850 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
@@ -23,6 +23,7 @@
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/Target/TargetMachine.h"
#define DEBUG_TYPE "amdgpu-postlegalizer-combiner"
@@ -58,6 +59,9 @@ public:
bool matchUCharToFloat(MachineInstr &MI);
void applyUCharToFloat(MachineInstr &MI);
+ bool matchRcpSqrtToRsq(MachineInstr &MI,
+ std::function<void(MachineIRBuilder &)> &MatchInfo);
+
// FIXME: Should be able to have 2 separate matchdatas rather than custom
// struct boilerplate.
struct CvtF32UByteMatchInfo {
@@ -203,6 +207,48 @@ void AMDGPUPostLegalizerCombinerHelper::applyUCharToFloat(MachineInstr &MI) {
MI.eraseFromParent();
}
+bool AMDGPUPostLegalizerCombinerHelper::matchRcpSqrtToRsq(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+
+ auto getRcpSrc = [=](const MachineInstr &MI) {
+ MachineInstr *ResMI = nullptr;
+ if (MI.getOpcode() == TargetOpcode::G_INTRINSIC &&
+ MI.getIntrinsicID() == Intrinsic::amdgcn_rcp)
+ ResMI = MRI.getVRegDef(MI.getOperand(2).getReg());
+
+ return ResMI;
+ };
+
+ auto getSqrtSrc = [=](const MachineInstr &MI) {
+ MachineInstr *SqrtSrcMI = nullptr;
+ mi_match(MI.getOperand(0).getReg(), MRI, m_GFSqrt(m_MInstr(SqrtSrcMI)));
+ return SqrtSrcMI;
+ };
+
+ MachineInstr *RcpSrcMI = nullptr, *SqrtSrcMI = nullptr;
+ // rcp(sqrt(x))
+ if ((RcpSrcMI = getRcpSrc(MI)) && (SqrtSrcMI = getSqrtSrc(*RcpSrcMI))) {
+ MatchInfo = [SqrtSrcMI, &MI](MachineIRBuilder &B) {
+ B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)}, false)
+ .addUse(SqrtSrcMI->getOperand(0).getReg())
+ .setMIFlags(MI.getFlags());
+ };
+ return true;
+ }
+
+ // sqrt(rcp(x))
+ if ((SqrtSrcMI = getSqrtSrc(MI)) && (RcpSrcMI = getRcpSrc(*SqrtSrcMI))) {
+ MatchInfo = [RcpSrcMI, &MI](MachineIRBuilder &B) {
+ B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)}, false)
+ .addUse(RcpSrcMI->getOperand(0).getReg())
+ .setMIFlags(MI.getFlags());
+ };
+ return true;
+ }
+
+ return false;
+}
+
bool AMDGPUPostLegalizerCombinerHelper::matchCvtF32UByteN(
MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo) {
Register SrcReg = MI.getOperand(1).getReg();
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp
index d560d2043f42..7c4eb71882c7 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp
@@ -280,10 +280,10 @@ bool AMDGPUPrintfRuntimeBindingImpl::lowerPrintfForGpu(Module &M) {
}
LLVM_DEBUG(dbgs() << "Printf format string in source = " << Str.str()
<< '\n');
- for (size_t I = 0; I < Str.size(); ++I) {
+ for (char C : Str) {
// Rest of the C escape sequences (e.g. \') are handled correctly
// by the MDParser
- switch (Str[I]) {
+ switch (C) {
case '\a':
Sizes << "\\a";
break;
@@ -308,7 +308,7 @@ bool AMDGPUPrintfRuntimeBindingImpl::lowerPrintfForGpu(Module &M) {
Sizes << "\\72";
break;
default:
- Sizes << Str[I];
+ Sizes << C;
break;
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
index d7dc9ee4117b..12b5830ef930 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
@@ -45,6 +45,7 @@ public:
TRI(*MF.getSubtarget().getRegisterInfo()), Helper(Helper){};
bool isVgprRegBank(Register Reg);
+ Register getAsVgpr(Register Reg);
struct MinMaxMedOpc {
unsigned Min, Max, Med;
@@ -69,6 +70,23 @@ bool AMDGPURegBankCombinerHelper::isVgprRegBank(Register Reg) {
return RBI.getRegBank(Reg, MRI, TRI)->getID() == AMDGPU::VGPRRegBankID;
}
+Register AMDGPURegBankCombinerHelper::getAsVgpr(Register Reg) {
+ if (isVgprRegBank(Reg))
+ return Reg;
+
+ // Search for existing copy of Reg to vgpr.
+ for (MachineInstr &Use : MRI.use_instructions(Reg)) {
+ Register Def = Use.getOperand(0).getReg();
+ if (Use.getOpcode() == AMDGPU::COPY && isVgprRegBank(Def))
+ return Def;
+ }
+
+ // Copy Reg to vgpr.
+ Register VgprReg = B.buildCopy(MRI.getType(Reg), Reg).getReg(0);
+ MRI.setRegBank(VgprReg, RBI.getRegBank(AMDGPU::VGPRRegBankID));
+ return VgprReg;
+}
+
AMDGPURegBankCombinerHelper::MinMaxMedOpc
AMDGPURegBankCombinerHelper::getMinMaxPair(unsigned Opc) {
switch (Opc) {
@@ -134,7 +152,9 @@ void AMDGPURegBankCombinerHelper::applyMed3(MachineInstr &MI,
Med3MatchInfo &MatchInfo) {
B.setInstrAndDebugLoc(MI);
B.buildInstr(MatchInfo.Opc, {MI.getOperand(0)},
- {MatchInfo.Val0, MatchInfo.Val1, MatchInfo.Val2}, MI.getFlags());
+ {getAsVgpr(MatchInfo.Val0), getAsVgpr(MatchInfo.Val1),
+ getAsVgpr(MatchInfo.Val2)},
+ MI.getFlags());
MI.eraseFromParent();
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index ab3ce980c3f6..5988403c0a29 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -3189,10 +3189,10 @@ unsigned AMDGPURegisterBankInfo::getMappingType(const MachineRegisterInfo &MRI,
const MachineInstr &MI) const {
unsigned RegBank = AMDGPU::InvalidRegBankID;
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- if (!MI.getOperand(i).isReg())
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg())
continue;
- Register Reg = MI.getOperand(i).getReg();
+ Register Reg = MO.getReg();
if (const RegisterBank *Bank = getRegBank(Reg, MRI, *TRI)) {
RegBank = regBankUnion(RegBank, Bank->getID());
if (RegBank == AMDGPU::VGPRRegBankID)
@@ -3206,10 +3206,10 @@ unsigned AMDGPURegisterBankInfo::getMappingType(const MachineRegisterInfo &MRI,
bool AMDGPURegisterBankInfo::isSALUMapping(const MachineInstr &MI) const {
const MachineFunction &MF = *MI.getParent()->getParent();
const MachineRegisterInfo &MRI = MF.getRegInfo();
- for (unsigned i = 0, e = MI.getNumOperands();i != e; ++i) {
- if (!MI.getOperand(i).isReg())
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg())
continue;
- Register Reg = MI.getOperand(i).getReg();
+ Register Reg = MO.getReg();
if (const RegisterBank *Bank = getRegBank(Reg, MRI, *TRI)) {
if (Bank->getID() != AMDGPU::SGPRRegBankID)
return false;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
index 1a9255f3240f..712f6dece911 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
@@ -706,9 +706,7 @@ bool AMDGPUCFGStructurizer::prepare() {
// Remove unconditional branch instr.
// Add dummy exit block iff there are multiple returns.
- for (SmallVectorImpl<MachineBasicBlock *>::const_iterator
- It = OrderedBlks.begin(), E = OrderedBlks.end(); It != E; ++It) {
- MachineBasicBlock *MBB = *It;
+ for (MachineBasicBlock *MBB : OrderedBlks) {
removeUnconditionalBranch(MBB);
removeRedundantConditionalBranch(MBB);
if (isReturnBlock(MBB)) {
@@ -851,14 +849,10 @@ bool AMDGPUCFGStructurizer::run() {
void AMDGPUCFGStructurizer::orderBlocks(MachineFunction *MF) {
int SccNum = 0;
- MachineBasicBlock *MBB;
for (scc_iterator<MachineFunction *> It = scc_begin(MF); !It.isAtEnd();
++It, ++SccNum) {
const std::vector<MachineBasicBlock *> &SccNext = *It;
- for (std::vector<MachineBasicBlock *>::const_iterator
- blockIter = SccNext.begin(), blockEnd = SccNext.end();
- blockIter != blockEnd; ++blockIter) {
- MBB = *blockIter;
+ for (MachineBasicBlock *MBB : SccNext) {
OrderedBlks.push_back(MBB);
recordSccnum(MBB, SccNum);
}
@@ -1601,11 +1595,8 @@ void AMDGPUCFGStructurizer::addDummyExitBlock(
FuncRep->push_back(DummyExitBlk); //insert to function
insertInstrEnd(DummyExitBlk, R600::RETURN);
- for (SmallVectorImpl<MachineBasicBlock *>::iterator It = RetMBB.begin(),
- E = RetMBB.end(); It != E; ++It) {
- MachineBasicBlock *MBB = *It;
- MachineInstr *MI = getReturnInstr(MBB);
- if (MI)
+ for (MachineBasicBlock *MBB : RetMBB) {
+ if (MachineInstr *MI = getReturnInstr(MBB))
MI->eraseFromParent();
MBB->addSuccessor(DummyExitBlk);
LLVM_DEBUG(dbgs() << "Add dummyExitBlock to BB" << MBB->getNumber()
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/CaymanInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/CaymanInstructions.td
index f4ddbf1131c3..d18dab0554bd 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/CaymanInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/CaymanInstructions.td
@@ -48,8 +48,6 @@ def SIN_cm : SIN_Common<0x8D>;
def COS_cm : COS_Common<0x8E>;
} // End isVector = 1
-def : RsqPat<RECIPSQRT_IEEE_cm, f32>;
-
def : SqrtPat<RECIPSQRT_IEEE_cm, RECIP_IEEE_cm>;
def : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/EvergreenInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/EvergreenInstructions.td
index 12224cb3f797..a9a3421e8192 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/EvergreenInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/EvergreenInstructions.td
@@ -126,7 +126,6 @@ def EXP_IEEE_eg : EXP_IEEE_Common<0x81>;
def LOG_IEEE_eg : LOG_IEEE_Common<0x83>;
def RECIP_CLAMPED_eg : RECIP_CLAMPED_Common<0x84>;
def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>;
-def : RsqPat<RECIPSQRT_IEEE_eg, f32>;
def : SqrtPat<RECIPSQRT_IEEE_eg, RECIP_IEEE_eg>;
def SIN_eg : SIN_Common<0x8D>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index ff5d0b0af6a4..0f8dd0b3bf58 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -1442,12 +1442,10 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
bool FullReg;
const MachineInstr *MI1;
- auto IsOverlappedDGEMMorXDLFn = [Reg, &IsMFMAFn, &FullReg, &MI1,
- this](const MachineInstr &MI) {
+ auto IsOverlappedMFMAFn = [Reg, &IsMFMAFn, &FullReg, &MI1,
+ this](const MachineInstr &MI) {
if (!IsMFMAFn(MI))
return false;
- if (!isDGEMM(MI.getOpcode()) && !isXDL(ST, MI))
- return false;
Register DstReg = MI.getOperand(0).getReg();
FullReg = (DstReg == Reg);
MI1 = &MI;
@@ -1458,8 +1456,8 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
getWaitStatesSinceDef(Reg, IsLegacyVALUNotDotFn, MaxWaitStates);
WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
- int NumWaitStates = getWaitStatesSinceDef(Reg, IsOverlappedDGEMMorXDLFn,
- MaxWaitStates);
+ int NumWaitStates =
+ getWaitStatesSinceDef(Reg, IsOverlappedMFMAFn, MaxWaitStates);
if (NumWaitStates == std::numeric_limits<int>::max())
continue;
@@ -1619,12 +1617,9 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) {
const MachineInstr *MFMA = nullptr;
unsigned Reg;
- auto IsDGEMMorXDLWriteFn = [&Reg, &IsMFMAFn, &MFMA,
- this](const MachineInstr &MI) {
+ auto IsMFMAWriteFn = [&Reg, &IsMFMAFn, &MFMA, this](const MachineInstr &MI) {
if (!IsMFMAFn(MI) || !TRI.regsOverlap(MI.getOperand(0).getReg(), Reg))
return false;
- if (!isDGEMM(MI.getOpcode()) && !isXDL(ST, MI))
- return false;
MFMA = &MI;
return true;
};
@@ -1675,8 +1670,8 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) {
}
MFMA = nullptr;
- WaitStatesSinceDef = getWaitStatesSinceDef(Reg, IsDGEMMorXDLWriteFn,
- MaxWaitStates);
+ WaitStatesSinceDef =
+ getWaitStatesSinceDef(Reg, IsMFMAWriteFn, MaxWaitStates);
if (!MFMA)
continue;
@@ -1750,8 +1745,8 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) {
WaitStatesSinceDef);
MFMA = nullptr;
- WaitStatesSinceDef = getWaitStatesSinceDef(Reg, IsDGEMMorXDLWriteFn,
- MaxWaitStates);
+ WaitStatesSinceDef =
+ getWaitStatesSinceDef(Reg, IsMFMAWriteFn, MaxWaitStates);
if (MFMA) {
int NeedWaitStates = MaxWaitStates;
switch (TSchedModel.computeInstrLatency(MFMA)) {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 3456f9a6156c..82c09378acac 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -74,11 +74,11 @@ unsigned GCNRegPressure::getRegKind(Register Reg,
assert(Reg.isVirtual());
const auto RC = MRI.getRegClass(Reg);
auto STI = static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
- return STI->isSGPRClass(RC) ?
- (STI->getRegSizeInBits(*RC) == 32 ? SGPR32 : SGPR_TUPLE) :
- STI->hasAGPRs(RC) ?
- (STI->getRegSizeInBits(*RC) == 32 ? AGPR32 : AGPR_TUPLE) :
- (STI->getRegSizeInBits(*RC) == 32 ? VGPR32 : VGPR_TUPLE);
+ return STI->isSGPRClass(RC)
+ ? (STI->getRegSizeInBits(*RC) == 32 ? SGPR32 : SGPR_TUPLE)
+ : STI->isAGPRClass(RC)
+ ? (STI->getRegSizeInBits(*RC) == 32 ? AGPR32 : AGPR_TUPLE)
+ : (STI->getRegSizeInBits(*RC) == 32 ? VGPR32 : VGPR_TUPLE);
}
void GCNRegPressure::inc(unsigned Reg,
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp
index 1d93165f9eec..715fd69fc7ae 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp
@@ -177,9 +177,7 @@ bool R600ClauseMergePass::runOnMachineFunction(MachineFunction &MF) {
const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>();
TII = ST.getInstrInfo();
- for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
- BB != BB_E; ++BB) {
- MachineBasicBlock &MBB = *BB;
+ for (MachineBasicBlock &MBB : MF) {
MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
MachineBasicBlock::iterator LatestCFAlu = E;
while (I != E) {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
index d5eaa33ef964..b9ca7f928d56 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
@@ -301,9 +301,7 @@ public:
const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>();
TII = ST.getInstrInfo();
- for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
- BB != BB_E; ++BB) {
- MachineBasicBlock &MBB = *BB;
+ for (MachineBasicBlock &MBB : MF) {
MachineBasicBlock::iterator I = MBB.begin();
if (I != MBB.end() && I->getOpcode() == R600::CF_ALU)
continue; // BB was already parsed
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
index 838a497b4df1..194879fef53c 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
@@ -73,9 +73,7 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
const R600RegisterInfo &TRI = TII->getRegisterInfo();
- for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
- BB != BB_E; ++BB) {
- MachineBasicBlock &MBB = *BB;
+ for (MachineBasicBlock &MBB : MF) {
MachineBasicBlock::iterator I = MBB.begin();
while (I != MBB.end()) {
MachineInstr &MI = *I;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
index 0215eb9f9fea..bd757e9e3d70 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -285,9 +285,8 @@ R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
TII->get(R600::getLDSNoRetOp(MI.getOpcode())));
- for (unsigned i = 1, e = MI.getNumOperands(); i < e; ++i) {
- NewMI.add(MI.getOperand(i));
- }
+ for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
+ NewMI.add(MO);
} else {
return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600Instructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600Instructions.td
index 4487864888b6..b3da2fdefacc 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600Instructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600Instructions.td
@@ -1265,7 +1265,6 @@ let Predicates = [isR600] in {
defm DIV_r600 : DIV_Common<RECIP_IEEE_r600>;
def : POW_Common <LOG_IEEE_r600, EXP_IEEE_r600, MUL>;
- def : RsqPat<RECIPSQRT_IEEE_r600, f32>;
def : SqrtPat<RECIPSQRT_IEEE_r600, RECIP_IEEE_r600>;
def R600_ExportSwz : ExportSwzInst {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp
index 36acfafa72aa..6aee2f591b56 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp
@@ -124,11 +124,9 @@ SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) {
DAG->dumpNode(*SU);
} else {
dbgs() << "NO NODE \n";
- for (unsigned i = 0; i < DAG->SUnits.size(); i++) {
- const SUnit &S = DAG->SUnits[i];
+ for (const SUnit &S : DAG->SUnits)
if (!S.isScheduled)
DAG->dumpNode(S);
- }
});
return SU;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
index 1a723279dc9f..72cf48c04e7f 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
@@ -323,14 +323,12 @@ bool R600VectorRegMerger::runOnMachineFunction(MachineFunction &Fn) {
TII = ST.getInstrInfo();
MRI = &Fn.getRegInfo();
- for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
- MBB != MBBe; ++MBB) {
- MachineBasicBlock *MB = &*MBB;
+ for (MachineBasicBlock &MB : Fn) {
PreviousRegSeq.clear();
PreviousRegSeqByReg.clear();
PreviousRegSeqByUndefCount.clear();
- for (MachineBasicBlock::iterator MII = MB->begin(), MIIE = MB->end();
+ for (MachineBasicBlock::iterator MII = MB.begin(), MIIE = MB.end();
MII != MIIE; ++MII) {
MachineInstr &MI = *MII;
if (MI.getOpcode() != R600::REG_SEQUENCE) {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600Packetizer.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600Packetizer.cpp
index e858bba2983c..beb0aad86e89 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600Packetizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600Packetizer.cpp
@@ -343,20 +343,11 @@ bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) {
// dependence between Insn 0 and Insn 2. This can lead to incorrect
// packetization
//
- for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
- MBB != MBBe; ++MBB) {
- MachineBasicBlock::iterator End = MBB->end();
- MachineBasicBlock::iterator MI = MBB->begin();
- while (MI != End) {
- if (MI->isKill() || MI->getOpcode() == R600::IMPLICIT_DEF ||
- (MI->getOpcode() == R600::CF_ALU && !MI->getOperand(8).getImm())) {
- MachineBasicBlock::iterator DeleteMI = MI;
- ++MI;
- MBB->erase(DeleteMI);
- End = MBB->end();
- continue;
- }
- ++MI;
+ for (MachineBasicBlock &MBB : Fn) {
+ for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
+ if (MI.isKill() || MI.getOpcode() == R600::IMPLICIT_DEF ||
+ (MI.getOpcode() == R600::CF_ALU && !MI.getOperand(8).getImm()))
+ MBB.erase(MI);
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIDefines.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIDefines.h
index 777744f08cde..580e4bc417a4 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -18,7 +18,8 @@ namespace llvm {
enum SIRCFlags : uint8_t {
// For vector registers.
HasVGPR = 1 << 0,
- HasAGPR = 1 << 1
+ HasAGPR = 1 << 1,
+ HasSGPR = 1 << 2
}; // enum SIRCFlags
namespace SIInstrFlags {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index cf93a63f26a0..f54778535b7c 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -127,11 +127,11 @@ FunctionPass *llvm::createSIFixSGPRCopiesPass() {
static bool hasVectorOperands(const MachineInstr &MI,
const SIRegisterInfo *TRI) {
const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- if (!MI.getOperand(i).isReg() || !MI.getOperand(i).getReg().isVirtual())
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg() || !MO.getReg().isVirtual())
continue;
- if (TRI->hasVectorRegisters(MRI.getRegClass(MI.getOperand(i).getReg())))
+ if (TRI->hasVectorRegisters(MRI.getRegClass(MO.getReg())))
return true;
}
return false;
@@ -259,7 +259,7 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
// VGPRz = REG_SEQUENCE VGPRx, sub0
MI.getOperand(0).setReg(CopyUse.getOperand(0).getReg());
- bool IsAGPR = TRI->hasAGPRs(DstRC);
+ bool IsAGPR = TRI->isAGPRClass(DstRC);
for (unsigned I = 1, N = MI.getNumOperands(); I != N; I += 2) {
Register SrcReg = MI.getOperand(I).getReg();
@@ -853,7 +853,7 @@ MachineBasicBlock *SIFixSGPRCopies::processPHINode(MachineInstr &MI) {
Register PHIRes = MI.getOperand(0).getReg();
const TargetRegisterClass *RC0 = MRI->getRegClass(PHIRes);
- if (AllAGPRUses && numVGPRUses && !TRI->hasAGPRs(RC0)) {
+ if (AllAGPRUses && numVGPRUses && !TRI->isAGPRClass(RC0)) {
LLVM_DEBUG(dbgs() << "Moving PHI to AGPR: " << MI);
MRI->setRegClass(PHIRes, TRI->getEquivalentAGPRClass(RC0));
for (unsigned I = 1, N = MI.getNumOperands(); I != N; I += 2) {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index a3a0e9c9b9ac..200e00ee5521 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1586,17 +1586,9 @@ bool SIFoldOperands::tryFoldRegSequence(MachineInstr &MI) {
unsigned OpIdx = Op - &UseMI->getOperand(0);
const MCInstrDesc &InstDesc = UseMI->getDesc();
- const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpIdx];
- switch (OpInfo.RegClass) {
- case AMDGPU::AV_32RegClassID: LLVM_FALLTHROUGH;
- case AMDGPU::AV_64RegClassID: LLVM_FALLTHROUGH;
- case AMDGPU::AV_96RegClassID: LLVM_FALLTHROUGH;
- case AMDGPU::AV_128RegClassID: LLVM_FALLTHROUGH;
- case AMDGPU::AV_160RegClassID:
- break;
- default:
+ if (!TRI->isVectorSuperClass(
+ TRI->getRegClass(InstDesc.OpInfo[OpIdx].RegClass)))
return false;
- }
const auto *NewDstRC = TRI->getEquivalentAGPRClass(MRI->getRegClass(Reg));
auto Dst = MRI->createVirtualRegister(NewDstRC);
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index 882b9a203755..4706c74be721 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -1364,6 +1364,34 @@ bool SIFrameLowering::assignCalleeSavedSpillSlots(
return false;
}
+bool SIFrameLowering::allocateScavengingFrameIndexesNearIncomingSP(
+ const MachineFunction &MF) const {
+
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ uint64_t EstStackSize = MFI.estimateStackSize(MF);
+ uint64_t MaxOffset = EstStackSize - 1;
+
+ // We need the emergency stack slots to be allocated in range of the
+ // MUBUF/flat scratch immediate offset from the base register, so assign these
+ // first at the incoming SP position.
+ //
+ // TODO: We could try sorting the objects to find a hole in the first bytes
+ // rather than allocating as close to possible. This could save a lot of space
+ // on frames with alignment requirements.
+ if (ST.enableFlatScratch()) {
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ if (TII->isLegalFLATOffset(MaxOffset, AMDGPUAS::PRIVATE_ADDRESS,
+ SIInstrFlags::FlatScratch))
+ return false;
+ } else {
+ if (SIInstrInfo::isLegalMUBUFImmOffset(MaxOffset))
+ return false;
+ }
+
+ return true;
+}
+
MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr(
MachineFunction &MF,
MachineBasicBlock &MBB,
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFrameLowering.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFrameLowering.h
index 951ea79b2809..56fbb875ffd9 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFrameLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIFrameLowering.h
@@ -43,6 +43,9 @@ public:
const TargetRegisterInfo *TRI,
std::vector<CalleeSavedInfo> &CSI) const override;
+ bool allocateScavengingFrameIndexesNearIncomingSP(
+ const MachineFunction &MF) const override;
+
bool isSupportedStackID(TargetStackID::Value ID) const override;
void processFunctionBeforeFrameFinalized(
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 519c5b936536..35b72f5d201b 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -809,6 +809,11 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SMULO, MVT::i64, Custom);
setOperationAction(ISD::UMULO, MVT::i64, Custom);
+ if (Subtarget->hasMad64_32()) {
+ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Custom);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i32, Custom);
+ }
+
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f32, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom);
@@ -919,6 +924,16 @@ bool SITargetLowering::isFPExtFoldable(const SelectionDAG &DAG, unsigned Opcode,
!hasFP32Denormals(DAG.getMachineFunction());
}
+bool SITargetLowering::isFPExtFoldable(const MachineInstr &MI, unsigned Opcode,
+ LLT DestTy, LLT SrcTy) const {
+ return ((Opcode == TargetOpcode::G_FMAD && Subtarget->hasMadMixInsts()) ||
+ (Opcode == TargetOpcode::G_FMA && Subtarget->hasFmaMixInsts())) &&
+ DestTy.getScalarSizeInBits() == 32 &&
+ SrcTy.getScalarSizeInBits() == 16 &&
+ // TODO: This probably only requires no input flushing?
+ !hasFP32Denormals(*MI.getMF());
+}
+
bool SITargetLowering::isShuffleMaskLegal(ArrayRef<int>, EVT) const {
// SI has some legal vector types, but no legal vector operations. Say no
// shuffles are legal in order to prefer scalarizing some vector operations.
@@ -4290,8 +4305,8 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
MachineInstrBuilder MIB;
MIB = BuildMI(*BB, MI, DL, TII->get(AMDGPU::SI_CALL), ReturnAddrReg);
- for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I)
- MIB.add(MI.getOperand(I));
+ for (const MachineOperand &MO : MI.operands())
+ MIB.add(MO);
MIB.cloneMemRefs(MI);
MI.eraseFromParent();
@@ -4457,6 +4472,8 @@ bool SITargetLowering::enableAggressiveFMAFusion(EVT VT) const {
return true;
}
+bool SITargetLowering::enableAggressiveFMAFusion(LLT Ty) const { return true; }
+
EVT SITargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &Ctx,
EVT VT) const {
if (!VT.isVector()) {
@@ -4522,6 +4539,34 @@ bool SITargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
return false;
}
+bool SITargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
+ LLT Ty) const {
+ switch (Ty.getScalarSizeInBits()) {
+ case 16:
+ return isFMAFasterThanFMulAndFAdd(MF, MVT::f16);
+ case 32:
+ return isFMAFasterThanFMulAndFAdd(MF, MVT::f32);
+ case 64:
+ return isFMAFasterThanFMulAndFAdd(MF, MVT::f64);
+ default:
+ break;
+ }
+
+ return false;
+}
+
+bool SITargetLowering::isFMADLegal(const MachineInstr &MI, LLT Ty) const {
+ if (!Ty.isScalar())
+ return false;
+
+ if (Ty.getScalarSizeInBits() == 16)
+ return Subtarget->hasMadF16() && !hasFP64FP16Denormals(*MI.getMF());
+ if (Ty.getScalarSizeInBits() == 32)
+ return Subtarget->hasMadMacF32Insts() && !hasFP32Denormals(*MI.getMF());
+
+ return false;
+}
+
bool SITargetLowering::isFMADLegal(const SelectionDAG &DAG,
const SDNode *N) const {
// TODO: Check future ftz flag
@@ -4691,6 +4736,9 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SMULO:
case ISD::UMULO:
return lowerXMULO(Op, DAG);
+ case ISD::SMUL_LOHI:
+ case ISD::UMUL_LOHI:
+ return lowerXMUL_LOHI(Op, DAG);
case ISD::DYNAMIC_STACKALLOC:
return LowerDYNAMIC_STACKALLOC(Op, DAG);
}
@@ -5304,6 +5352,21 @@ SDValue SITargetLowering::lowerXMULO(SDValue Op, SelectionDAG &DAG) const {
return DAG.getMergeValues({ Result, Overflow }, SL);
}
+SDValue SITargetLowering::lowerXMUL_LOHI(SDValue Op, SelectionDAG &DAG) const {
+ if (Op->isDivergent()) {
+ // Select to V_MAD_[IU]64_[IU]32.
+ return Op;
+ }
+ if (Subtarget->hasSMulHi()) {
+ // Expand to S_MUL_I32 + S_MUL_HI_[IU]32.
+ return SDValue();
+ }
+ // The multiply is uniform but we would have to use V_MUL_HI_[IU]32 to
+ // calculate the high part, so we might as well do the whole thing with
+ // V_MAD_[IU]64_[IU]32.
+ return Op;
+}
+
SDValue SITargetLowering::lowerTRAP(SDValue Op, SelectionDAG &DAG) const {
if (!Subtarget->isTrapHandlerEnabled() ||
Subtarget->getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbi::AMDHSA)
@@ -9790,10 +9853,9 @@ bool SITargetLowering::isCanonicalized(Register Reg, MachineFunction &MF,
if (Subtarget->supportsMinMaxDenormModes() ||
denormalsEnabledForType(MRI.getType(Reg), MF))
return true;
- for (unsigned I = 1, E = MI->getNumOperands(); I != E; ++I) {
- if (!isCanonicalized(MI->getOperand(I).getReg(), MF, MaxDepth - 1))
+ for (const MachineOperand &MO : llvm::drop_begin(MI->operands()))
+ if (!isCanonicalized(MO.getReg(), MF, MaxDepth - 1))
return false;
- }
return true;
}
default:
@@ -11460,15 +11522,15 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
if (I == -1)
break;
MachineOperand &Op = MI.getOperand(I);
- if ((OpInfo[I].RegClass != llvm::AMDGPU::AV_64RegClassID &&
- OpInfo[I].RegClass != llvm::AMDGPU::AV_32RegClassID) ||
- !Op.getReg().isVirtual() || !TRI->isAGPR(MRI, Op.getReg()))
+ if (!Op.isReg() || !Op.getReg().isVirtual())
+ continue;
+ auto *RC = TRI->getRegClassForReg(MRI, Op.getReg());
+ if (!TRI->hasAGPRs(RC))
continue;
auto *Src = MRI.getUniqueVRegDef(Op.getReg());
if (!Src || !Src->isCopy() ||
!TRI->isSGPRReg(MRI, Src->getOperand(1).getReg()))
continue;
- auto *RC = TRI->getRegClassForReg(MRI, Op.getReg());
auto *NewRC = TRI->getEquivalentVGPRClass(RC);
// All uses of agpr64 and agpr32 can also accept vgpr except for
// v_accvgpr_read, but we do not produce agpr reads during selection,
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 1e48c96ad3c8..1315cc15dd02 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -135,6 +135,7 @@ private:
SDValue lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFMINNUM_FMAXNUM(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerXMULO(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerXMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
SDValue getSegmentAperture(unsigned AS, const SDLoc &DL,
SelectionDAG &DAG) const;
@@ -252,6 +253,9 @@ public:
bool isFPExtFoldable(const SelectionDAG &DAG, unsigned Opcode, EVT DestVT,
EVT SrcVT) const override;
+ bool isFPExtFoldable(const MachineInstr &MI, unsigned Opcode, LLT DestTy,
+ LLT SrcTy) const override;
+
bool isShuffleMaskLegal(ArrayRef<int> /*Mask*/, EVT /*VT*/) const override;
bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &,
@@ -377,6 +381,7 @@ public:
bool hasBitPreservingFPLogic(EVT VT) const override;
bool enableAggressiveFMAFusion(EVT VT) const override;
+ bool enableAggressiveFMAFusion(LLT Ty) const override;
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
EVT VT) const override;
MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override;
@@ -384,7 +389,10 @@ public:
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
EVT VT) const override;
+ bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
+ const LLT Ty) const override;
bool isFMADLegal(const SelectionDAG &DAG, const SDNode *N) const override;
+ bool isFMADLegal(const MachineInstr &MI, const LLT Ty) const override;
SDValue splitUnaryVectorOp(SDValue Op, SelectionDAG &DAG) const;
SDValue splitBinaryVectorOp(SDValue Op, SelectionDAG &DAG) const;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index f4e5771d2a2a..c9d9dd1fb82c 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -150,6 +150,8 @@ enum VmemType {
VMEM_NOSAMPLER,
// MIMG instructions with a sampler.
VMEM_SAMPLER,
+ // BVH instructions
+ VMEM_BVH
};
VmemType getVmemType(const MachineInstr &Inst) {
@@ -157,9 +159,10 @@ VmemType getVmemType(const MachineInstr &Inst) {
if (!SIInstrInfo::isMIMG(Inst))
return VMEM_NOSAMPLER;
const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Inst.getOpcode());
- return AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode)->Sampler
- ? VMEM_SAMPLER
- : VMEM_NOSAMPLER;
+ const AMDGPU::MIMGBaseOpcodeInfo *BaseInfo =
+ AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
+ return BaseInfo->BVH ? VMEM_BVH
+ : BaseInfo->Sampler ? VMEM_SAMPLER : VMEM_NOSAMPLER;
}
void addWait(AMDGPU::Waitcnt &Wait, InstCounterType T, unsigned Count) {
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 4a928123b68f..92f5322b8ad2 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -898,10 +898,10 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
unsigned EltSize = 4;
unsigned Opcode = AMDGPU::V_MOV_B32_e32;
- if (RI.hasAGPRs(RC)) {
+ if (RI.isAGPRClass(RC)) {
Opcode = (RI.hasVGPRs(SrcRC)) ?
AMDGPU::V_ACCVGPR_WRITE_B32_e64 : AMDGPU::INSTRUCTION_LIST_END;
- } else if (RI.hasVGPRs(RC) && RI.hasAGPRs(SrcRC)) {
+ } else if (RI.hasVGPRs(RC) && RI.isAGPRClass(SrcRC)) {
Opcode = AMDGPU::V_ACCVGPR_READ_B32_e64;
} else if ((Size % 64 == 0) && RI.hasVGPRs(RC) &&
(RI.isProperlyAlignedRC(*RC) &&
@@ -1205,7 +1205,7 @@ Register SIInstrInfo::insertNE(MachineBasicBlock *MBB,
unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const {
- if (RI.hasAGPRs(DstRC))
+ if (RI.isAGPRClass(DstRC))
return AMDGPU::COPY;
if (RI.getRegSizeInBits(*DstRC) == 32) {
return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
@@ -1435,6 +1435,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
FrameInfo.getObjectAlign(FrameIndex));
unsigned SpillSize = TRI->getSpillSize(*RC);
+ MachineRegisterInfo &MRI = MF->getRegInfo();
if (RI.isSGPRClass(RC)) {
MFI->setHasSpilledSGPRs();
assert(SrcReg != AMDGPU::M0 && "m0 should not be spilled");
@@ -1448,7 +1449,6 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
// The SGPR spill/restore instructions only work on number sgprs, so we need
// to make sure we are using the correct register class.
if (SrcReg.isVirtual() && SpillSize == 4) {
- MachineRegisterInfo &MRI = MF->getRegInfo();
MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
}
@@ -1463,10 +1463,21 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
return;
}
- unsigned Opcode = RI.hasAGPRs(RC) ? getAGPRSpillSaveOpcode(SpillSize)
- : getVGPRSpillSaveOpcode(SpillSize);
+ unsigned Opcode = RI.isAGPRClass(RC) ? getAGPRSpillSaveOpcode(SpillSize)
+ : getVGPRSpillSaveOpcode(SpillSize);
MFI->setHasSpilledVGPRs();
+ if (RI.isVectorSuperClass(RC)) {
+ // Convert an AV spill into a VGPR spill. Introduce a copy from AV to an
+ // equivalent VGPR register beforehand. Regalloc might want to introduce
+ // AV spills only to be relevant until rewriter at which they become
+ // either spills of VGPRs or AGPRs.
+ Register TmpVReg = MRI.createVirtualRegister(RI.getEquivalentVGPRClass(RC));
+ BuildMI(MBB, MI, DL, get(TargetOpcode::COPY), TmpVReg)
+ .addReg(SrcReg, RegState::Kill);
+ SrcReg = TmpVReg;
+ }
+
BuildMI(MBB, MI, DL, get(Opcode))
.addReg(SrcReg, getKillRegState(isKill)) // data
.addFrameIndex(FrameIndex) // addr
@@ -1598,13 +1609,26 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
return;
}
- unsigned Opcode = RI.hasAGPRs(RC) ? getAGPRSpillRestoreOpcode(SpillSize)
- : getVGPRSpillRestoreOpcode(SpillSize);
+ unsigned Opcode = RI.isAGPRClass(RC) ? getAGPRSpillRestoreOpcode(SpillSize)
+ : getVGPRSpillRestoreOpcode(SpillSize);
+
+ bool IsVectorSuperClass = RI.isVectorSuperClass(RC);
+ Register TmpReg = DestReg;
+ if (IsVectorSuperClass) {
+ // For AV classes, insert the spill restore to a VGPR followed by a copy
+ // into an equivalent AV register.
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ DestReg = MRI.createVirtualRegister(RI.getEquivalentVGPRClass(RC));
+ }
BuildMI(MBB, MI, DL, get(Opcode), DestReg)
.addFrameIndex(FrameIndex) // vaddr
.addReg(MFI->getStackPtrOffsetReg()) // scratch_offset
.addImm(0) // offset
.addMemOperand(MMO);
+
+ if (IsVectorSuperClass)
+ BuildMI(MBB, MI, DL, get(TargetOpcode::COPY), TmpReg)
+ .addReg(DestReg, RegState::Kill);
}
void SIInstrInfo::insertNoop(MachineBasicBlock &MBB,
@@ -2802,12 +2826,11 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
}
if (Is16Bit) {
- if (isVGPRCopy)
- return false; // Do not clobber vgpr_hi16
+ if (isVGPRCopy)
+ return false; // Do not clobber vgpr_hi16
- if (DstReg.isVirtual() &&
- UseMI.getOperand(0).getSubReg() != AMDGPU::lo16)
- return false;
+ if (DstReg.isVirtual() && UseMI.getOperand(0).getSubReg() != AMDGPU::lo16)
+ return false;
UseMI.getOperand(0).setSubReg(0);
if (DstReg.isPhysical()) {
@@ -3896,9 +3919,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
// verification is broken anyway
if (ST.needsAlignedVGPRs()) {
const TargetRegisterClass *RC = RI.getRegClassForReg(MRI, Reg);
- const bool IsVGPR = RI.hasVGPRs(RC);
- const bool IsAGPR = !IsVGPR && RI.hasAGPRs(RC);
- if ((IsVGPR || IsAGPR) && MO.getSubReg()) {
+ if (RI.hasVectorRegisters(RC) && MO.getSubReg()) {
const TargetRegisterClass *SubRC =
RI.getSubRegClass(RC, MO.getSubReg());
RC = RI.getCompatibleSubRegClass(RC, SubRC, MO.getSubReg());
@@ -5522,13 +5543,13 @@ SIInstrInfo::legalizeOperands(MachineInstr &MI,
if (getOpRegClass(MI, 0) == &AMDGPU::VReg_1RegClass) {
VRC = &AMDGPU::VReg_1RegClass;
} else
- VRC = RI.hasAGPRs(getOpRegClass(MI, 0))
+ VRC = RI.isAGPRClass(getOpRegClass(MI, 0))
? RI.getEquivalentAGPRClass(SRC)
: RI.getEquivalentVGPRClass(SRC);
} else {
- VRC = RI.hasAGPRs(getOpRegClass(MI, 0))
- ? RI.getEquivalentAGPRClass(VRC)
- : RI.getEquivalentVGPRClass(VRC);
+ VRC = RI.isAGPRClass(getOpRegClass(MI, 0))
+ ? RI.getEquivalentAGPRClass(VRC)
+ : RI.getEquivalentVGPRClass(VRC);
}
RC = VRC;
} else {
@@ -7065,8 +7086,8 @@ const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass(
case AMDGPU::STRICT_WWM:
case AMDGPU::STRICT_WQM: {
const TargetRegisterClass *SrcRC = getOpRegClass(Inst, 1);
- if (RI.hasAGPRs(SrcRC)) {
- if (RI.hasAGPRs(NewDstRC))
+ if (RI.isAGPRClass(SrcRC)) {
+ if (RI.isAGPRClass(NewDstRC))
return nullptr;
switch (Inst.getOpcode()) {
@@ -7082,7 +7103,7 @@ const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass(
if (!NewDstRC)
return nullptr;
} else {
- if (RI.hasVGPRs(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass)
+ if (RI.isVGPRClass(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass)
return nullptr;
NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 8c24268e379e..47ee83eb9351 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -2246,7 +2246,7 @@ class VOP_NO_EXT <VOPProfile p> : VOPProfile <p.ArgVT> {
let HasExtSDWA9 = 0;
}
-class VOP_PAT_GEN <VOPProfile p, int mode=PatGenMode.Pattern> : VOPProfile <p.ArgVT> {
+class VOP_PAT_GEN <VOPProfile p, int mode=PatGenMode.NoPattern> : VOPProfile <p.ArgVT> {
let NeedPatGen = mode;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstructions.td
index d5f9cb8ba493..d55d8da8699a 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -827,10 +827,6 @@ def : Pat <
let OtherPredicates = [UnsafeFPMath] in {
-//defm : RsqPat<V_RSQ_F32_e32, f32>;
-
-def : RsqPat<V_RSQ_F32_e32, f32>;
-
// Convert (x - floor(x)) to fract(x)
def : GCNPat <
(f32 (fsub (f32 (VOP3Mods f32:$x, i32:$mods)),
@@ -1372,61 +1368,48 @@ def : GCNPat <
>;
}
+
/********** ================================ **********/
/********** Floating point absolute/negative **********/
/********** ================================ **********/
-// Prevent expanding both fneg and fabs.
-// TODO: Add IgnoredBySelectionDAG bit?
-let AddedComplexity = 1 in { // Prefer SALU to VALU patterns for DAG
-
def : GCNPat <
- (fneg (fabs (f32 SReg_32:$src))),
+ (UniformUnaryFrag<fneg> (fabs (f32 SReg_32:$src))),
(S_OR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x80000000))) // Set sign bit
>;
def : GCNPat <
- (fabs (f32 SReg_32:$src)),
+ (UniformUnaryFrag<fabs> (f32 SReg_32:$src)),
(S_AND_B32 SReg_32:$src, (S_MOV_B32 (i32 0x7fffffff)))
>;
def : GCNPat <
- (fneg (f32 SReg_32:$src)),
+ (UniformUnaryFrag<fneg> (f32 SReg_32:$src)),
(S_XOR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x80000000)))
>;
def : GCNPat <
- (fneg (f16 SReg_32:$src)),
+ (UniformUnaryFrag<fneg> (f16 SReg_32:$src)),
(S_XOR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x00008000)))
>;
def : GCNPat <
- (fneg (f16 VGPR_32:$src)),
- (V_XOR_B32_e32 (S_MOV_B32 (i32 0x00008000)), VGPR_32:$src)
->;
-
-def : GCNPat <
- (fabs (f16 SReg_32:$src)),
+ (UniformUnaryFrag<fabs> (f16 SReg_32:$src)),
(S_AND_B32 SReg_32:$src, (S_MOV_B32 (i32 0x00007fff)))
>;
def : GCNPat <
- (fneg (fabs (f16 SReg_32:$src))),
+ (UniformUnaryFrag<fneg> (fabs (f16 SReg_32:$src))),
(S_OR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x00008000))) // Set sign bit
>;
def : GCNPat <
- (fneg (fabs (f16 VGPR_32:$src))),
- (V_OR_B32_e32 (S_MOV_B32 (i32 0x00008000)), VGPR_32:$src) // Set sign bit
->;
-
-def : GCNPat <
- (fneg (v2f16 SReg_32:$src)),
+ (UniformUnaryFrag<fneg> (v2f16 SReg_32:$src)),
(S_XOR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x80008000)))
>;
def : GCNPat <
- (fabs (v2f16 SReg_32:$src)),
+ (UniformUnaryFrag<fabs> (v2f16 SReg_32:$src)),
(S_AND_B32 SReg_32:$src, (S_MOV_B32 (i32 0x7fff7fff)))
>;
@@ -1435,51 +1418,20 @@ def : GCNPat <
// fabs is not reported as free because there is modifier for it in
// VOP3P instructions, so it is turned into the bit op.
def : GCNPat <
- (fneg (v2f16 (bitconvert (and_oneuse (i32 SReg_32:$src), 0x7fff7fff)))),
+ (UniformUnaryFrag<fneg> (v2f16 (bitconvert (and_oneuse (i32 SReg_32:$src), 0x7fff7fff)))),
(S_OR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x80008000))) // Set sign bit
>;
def : GCNPat <
- (fneg (v2f16 (fabs SReg_32:$src))),
+ (UniformUnaryFrag<fneg> (v2f16 (fabs SReg_32:$src))),
(S_OR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x80008000))) // Set sign bit
>;
-// FIXME: The implicit-def of scc from S_[X]OR/AND_B32 is mishandled
- // def : GCNPat <
-// (fneg (f64 SReg_64:$src)),
-// (REG_SEQUENCE SReg_64,
-// (i32 (EXTRACT_SUBREG SReg_64:$src, sub0)),
-// sub0,
-// (S_XOR_B32 (i32 (EXTRACT_SUBREG SReg_64:$src, sub1)),
-// (i32 (S_MOV_B32 (i32 0x80000000)))),
-// sub1)
-// >;
-
-// def : GCNPat <
-// (fneg (fabs (f64 SReg_64:$src))),
-// (REG_SEQUENCE SReg_64,
-// (i32 (EXTRACT_SUBREG SReg_64:$src, sub0)),
-// sub0,
-// (S_OR_B32 (i32 (EXTRACT_SUBREG SReg_64:$src, sub1)),
-// (S_MOV_B32 (i32 0x80000000))), // Set sign bit.
-// sub1)
-// >;
-
-// FIXME: Use S_BITSET0_B32/B64?
-// def : GCNPat <
-// (fabs (f64 SReg_64:$src)),
-// (REG_SEQUENCE SReg_64,
-// (i32 (EXTRACT_SUBREG SReg_64:$src, sub0)),
-// sub0,
-// (S_AND_B32 (i32 (EXTRACT_SUBREG SReg_64:$src, sub1)),
-// (i32 (S_MOV_B32 (i32 0x7fffffff)))),
-// sub1)
-// >;
// COPY_TO_REGCLASS is needed to avoid using SCC from S_XOR_B32 instead
// of the real value.
def : GCNPat <
- (fneg (v2f32 SReg_64:$src)),
+ (UniformUnaryFrag<fneg> (v2f32 SReg_64:$src)),
(v2f32 (REG_SEQUENCE SReg_64,
(f32 (COPY_TO_REGCLASS (S_XOR_B32 (i32 (EXTRACT_SUBREG $src, sub0)),
(i32 (S_MOV_B32 (i32 0x80000000)))),
@@ -1489,36 +1441,103 @@ def : GCNPat <
SReg_32)), sub1))
>;
-} // End let AddedComplexity = 1
+def : GCNPat <
+ (UniformUnaryFrag<fabs> (v2f32 SReg_64:$src)),
+ (v2f32 (REG_SEQUENCE SReg_64,
+ (f32 (COPY_TO_REGCLASS (S_AND_B32 (i32 (EXTRACT_SUBREG $src, sub0)),
+ (i32 (S_MOV_B32 (i32 0x7fffffff)))),
+ SReg_32)), sub0,
+ (f32 (COPY_TO_REGCLASS (S_AND_B32 (i32 (EXTRACT_SUBREG $src, sub1)),
+ (i32 (S_MOV_B32 (i32 0x7fffffff)))),
+ SReg_32)), sub1))
+>;
+
+def : GCNPat <
+ (UniformUnaryFrag<fneg> (fabs (v2f32 SReg_64:$src))),
+ (v2f32 (REG_SEQUENCE SReg_64,
+ (f32 (COPY_TO_REGCLASS (S_OR_B32 (i32 (EXTRACT_SUBREG $src, sub0)),
+ (i32 (S_MOV_B32 (i32 0x80000000)))),
+ SReg_32)), sub0,
+ (f32 (COPY_TO_REGCLASS (S_OR_B32 (i32 (EXTRACT_SUBREG $src, sub1)),
+ (i32 (S_MOV_B32 (i32 0x80000000)))),
+ SReg_32)), sub1))
+>;
+
+// FIXME: Use S_BITSET0_B32/B64?
+def : GCNPat <
+ (UniformUnaryFrag<fabs> (f64 SReg_64:$src)),
+ (REG_SEQUENCE SReg_64,
+ (i32 (EXTRACT_SUBREG SReg_64:$src, sub0)),
+ sub0,
+ (i32 (COPY_TO_REGCLASS (S_AND_B32 (i32 (EXTRACT_SUBREG SReg_64:$src, sub1)),
+ (S_MOV_B32 (i32 0x7fffffff))), SReg_32)), // Set sign bit.
+ sub1)
+>;
+
+def : GCNPat <
+ (UniformUnaryFrag<fneg> (f64 SReg_64:$src)),
+ (REG_SEQUENCE SReg_64,
+ (i32 (EXTRACT_SUBREG SReg_64:$src, sub0)),
+ sub0,
+ (i32 (COPY_TO_REGCLASS (S_XOR_B32 (i32 (EXTRACT_SUBREG SReg_64:$src, sub1)),
+ (i32 (S_MOV_B32 (i32 0x80000000)))), SReg_32)),
+ sub1)
+>;
+
+def : GCNPat <
+ (UniformUnaryFrag<fneg> (fabs (f64 SReg_64:$src))),
+ (REG_SEQUENCE SReg_64,
+ (i32 (EXTRACT_SUBREG SReg_64:$src, sub0)),
+ sub0,
+ (i32 (COPY_TO_REGCLASS (S_OR_B32 (i32 (EXTRACT_SUBREG SReg_64:$src, sub1)),
+ (S_MOV_B32 (i32 0x80000000))), SReg_32)),// Set sign bit.
+ sub1)
+>;
+
+
+def : GCNPat <
+ (fneg (fabs (f32 VGPR_32:$src))),
+ (V_OR_B32_e64 (S_MOV_B32 (i32 0x80000000)), VGPR_32:$src) // Set sign bit
+>;
def : GCNPat <
(fabs (f32 VGPR_32:$src)),
- (V_AND_B32_e32 (S_MOV_B32 (i32 0x7fffffff)), VGPR_32:$src)
+ (V_AND_B32_e64 (S_MOV_B32 (i32 0x7fffffff)), VGPR_32:$src)
>;
def : GCNPat <
(fneg (f32 VGPR_32:$src)),
- (V_XOR_B32_e32 (S_MOV_B32 (i32 0x80000000)), VGPR_32:$src)
+ (V_XOR_B32_e64 (S_MOV_B32 (i32 0x80000000)), VGPR_32:$src)
>;
def : GCNPat <
(fabs (f16 VGPR_32:$src)),
- (V_AND_B32_e32 (S_MOV_B32 (i32 0x00007fff)), VGPR_32:$src)
+ (V_AND_B32_e64 (S_MOV_B32 (i32 0x00007fff)), VGPR_32:$src)
+>;
+
+def : GCNPat <
+ (fneg (f16 VGPR_32:$src)),
+ (V_XOR_B32_e64 (S_MOV_B32 (i32 0x00008000)), VGPR_32:$src)
+>;
+
+def : GCNPat <
+ (fneg (fabs (f16 VGPR_32:$src))),
+ (V_OR_B32_e64 (S_MOV_B32 (i32 0x00008000)), VGPR_32:$src) // Set sign bit
>;
def : GCNPat <
(fneg (v2f16 VGPR_32:$src)),
- (V_XOR_B32_e32 (S_MOV_B32 (i32 0x80008000)), VGPR_32:$src)
+ (V_XOR_B32_e64 (S_MOV_B32 (i32 0x80008000)), VGPR_32:$src)
>;
def : GCNPat <
(fabs (v2f16 VGPR_32:$src)),
- (V_AND_B32_e32 (S_MOV_B32 (i32 0x7fff7fff)), VGPR_32:$src)
+ (V_AND_B32_e64 (S_MOV_B32 (i32 0x7fff7fff)), VGPR_32:$src)
>;
def : GCNPat <
(fneg (v2f16 (fabs VGPR_32:$src))),
- (V_OR_B32_e32 (S_MOV_B32 (i32 0x80008000)), VGPR_32:$src) // Set sign bit
+ (V_OR_B32_e64 (S_MOV_B32 (i32 0x80008000)), VGPR_32:$src)
>;
def : GCNPat <
@@ -1526,30 +1545,28 @@ def : GCNPat <
(REG_SEQUENCE VReg_64,
(i32 (EXTRACT_SUBREG VReg_64:$src, sub0)),
sub0,
- (V_AND_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$src, sub1)),
- (V_MOV_B32_e32 (i32 0x7fffffff))), // Set sign bit.
+ (V_AND_B32_e64 (i32 (S_MOV_B32 (i32 0x7fffffff))),
+ (i32 (EXTRACT_SUBREG VReg_64:$src, sub1))),
sub1)
>;
-// TODO: Use SGPR for constant
def : GCNPat <
(fneg (f64 VReg_64:$src)),
(REG_SEQUENCE VReg_64,
(i32 (EXTRACT_SUBREG VReg_64:$src, sub0)),
sub0,
- (V_XOR_B32_e32 (i32 (EXTRACT_SUBREG VReg_64:$src, sub1)),
- (i32 (V_MOV_B32_e32 (i32 0x80000000)))),
+ (V_XOR_B32_e64 (i32 (S_MOV_B32 (i32 0x80000000))),
+ (i32 (EXTRACT_SUBREG VReg_64:$src, sub1))),
sub1)
>;
-// TODO: Use SGPR for constant
def : GCNPat <
(fneg (fabs (f64 VReg_64:$src))),
(REG_SEQUENCE VReg_64,
(i32 (EXTRACT_SUBREG VReg_64:$src, sub0)),
sub0,
- (V_OR_B32_e32 (i32 (EXTRACT_SUBREG VReg_64:$src, sub1)),
- (V_MOV_B32_e32 (i32 0x80000000))), // Set sign bit.
+ (V_OR_B32_e64 (i32 (S_MOV_B32 (i32 0x80000000))),
+ (i32 (EXTRACT_SUBREG VReg_64:$src, sub1))),
sub1)
>;
@@ -1681,14 +1698,9 @@ def : GCNPat <
/********** Intrinsic Patterns **********/
/********** ================== **********/
-let OtherPredicates = [isNotGFX90APlus] in
-// FIXME: Should use _e64 and select source modifiers.
-def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_LEGACY_F32_e32>;
-
-let OtherPredicates = [isGFX90APlus] in
def : GCNPat <
- (fpow f32:$src0, f32:$src1),
- (V_EXP_F32_e32 (V_MUL_LEGACY_F32_e64 0, f32:$src1, SRCMODS.NONE, (V_LOG_F32_e32 f32:$src0), 0, 0))
+ (f32 (fpow (VOP3Mods f32:$src0, i32:$src0_mods), (VOP3Mods f32:$src1, i32:$src1_mods))),
+ (V_EXP_F32_e64 SRCMODS.NONE, (V_MUL_LEGACY_F32_e64 $src1_mods, $src1, SRCMODS.NONE, (V_LOG_F32_e64 $src0_mods, $src0), 0, 0))
>;
def : GCNPat <
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index 34cbb49dcd16..f4d9002e930e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -1609,7 +1609,7 @@ SILoadStoreOptimizer::getTargetRegisterClass(const CombineInfo &CI,
}
unsigned BitWidth = 32 * (CI.Width + Paired.Width);
- return TRI->hasAGPRs(getDataRegClass(*CI.I))
+ return TRI->isAGPRClass(getDataRegClass(*CI.I))
? TRI->getAGPRClassForBitWidth(BitWidth)
: TRI->getVGPRClassForBitWidth(BitWidth);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index 29f072ca1e6c..fff4f6729c99 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -795,6 +795,8 @@ bool SIGfx6CacheControl::enableLoadCacheBypass(
switch (Scope) {
case SIAtomicScope::SYSTEM:
case SIAtomicScope::AGENT:
+ // Set L1 cache policy to MISS_EVICT.
+ // Note: there is no L2 cache bypass policy at the ISA level.
Changed |= enableGLCBit(MI);
break;
case SIAtomicScope::WORKGROUP:
@@ -837,8 +839,10 @@ bool SIGfx6CacheControl::enableRMWCacheBypass(
assert(MI->mayLoad() && MI->mayStore());
bool Changed = false;
- /// The L1 cache is write through so does not need to be bypassed. There is no
- /// bypass control for the L2 cache at the isa level.
+ /// Do not set GLC for RMW atomic operations as L0/L1 cache is automatically
+ /// bypassed, and the GLC bit is instead used to indicate if they are
+ /// return or no-return.
+ /// Note: there is no L2 cache coherent bypass control at the ISA level.
return Changed;
}
@@ -860,6 +864,9 @@ bool SIGfx6CacheControl::enableVolatileAndOrNonTemporal(
bool Changed = false;
if (IsVolatile) {
+ // Set L1 cache policy to be MISS_EVICT for load instructions
+ // and MISS_LRU for store instructions.
+ // Note: there is no L2 cache bypass policy at the ISA level.
if (Op == SIMemOp::LOAD)
Changed |= enableGLCBit(MI);
@@ -875,7 +882,8 @@ bool SIGfx6CacheControl::enableVolatileAndOrNonTemporal(
}
if (IsNonTemporal) {
- // Request L1 MISS_EVICT and L2 STREAM for load and store instructions.
+ // Setting both GLC and SLC configures L1 cache policy to MISS_EVICT
+ // for both loads and stores, and the L2 cache policy to STREAM.
Changed |= enableGLCBit(MI);
Changed |= enableSLCBit(MI);
return Changed;
@@ -1097,6 +1105,8 @@ bool SIGfx90ACacheControl::enableLoadCacheBypass(
switch (Scope) {
case SIAtomicScope::SYSTEM:
case SIAtomicScope::AGENT:
+ // Set the L1 cache policy to MISS_LRU.
+ // Note: there is no L2 cache bypass policy at the ISA level.
Changed |= enableGLCBit(MI);
break;
case SIAtomicScope::WORKGROUP:
@@ -1206,6 +1216,9 @@ bool SIGfx90ACacheControl::enableVolatileAndOrNonTemporal(
bool Changed = false;
if (IsVolatile) {
+ // Set L1 cache policy to be MISS_EVICT for load instructions
+ // and MISS_LRU for store instructions.
+ // Note: there is no L2 cache bypass policy at the ISA level.
if (Op == SIMemOp::LOAD)
Changed |= enableGLCBit(MI);
@@ -1221,7 +1234,8 @@ bool SIGfx90ACacheControl::enableVolatileAndOrNonTemporal(
}
if (IsNonTemporal) {
- // Request L1 MISS_EVICT and L2 STREAM for load and store instructions.
+ // Setting both GLC and SLC configures L1 cache policy to MISS_EVICT
+ // for both loads and stores, and the L2 cache policy to STREAM.
Changed |= enableGLCBit(MI);
Changed |= enableSLCBit(MI);
return Changed;
@@ -1380,12 +1394,11 @@ bool SIGfx10CacheControl::enableLoadCacheBypass(
bool Changed = false;
if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
- /// TODO Do not set glc for rmw atomic operations as they
- /// implicitly bypass the L0/L1 caches.
-
switch (Scope) {
case SIAtomicScope::SYSTEM:
case SIAtomicScope::AGENT:
+ // Set the L0 and L1 cache policies to MISS_EVICT.
+ // Note: there is no L2 cache coherent bypass control at the ISA level.
Changed |= enableGLCBit(MI);
Changed |= enableDLCBit(MI);
break;
@@ -1434,6 +1447,9 @@ bool SIGfx10CacheControl::enableVolatileAndOrNonTemporal(
bool Changed = false;
if (IsVolatile) {
+ // Set L0 and L1 cache policy to be MISS_EVICT for load instructions
+ // and MISS_LRU for store instructions.
+ // Note: there is no L2 cache coherent bypass control at the ISA level.
if (Op == SIMemOp::LOAD) {
Changed |= enableGLCBit(MI);
Changed |= enableDLCBit(MI);
@@ -1450,8 +1466,14 @@ bool SIGfx10CacheControl::enableVolatileAndOrNonTemporal(
}
if (IsNonTemporal) {
- // Request L0/L1 HIT_EVICT and L2 STREAM for load and store instructions.
+ // For loads setting SLC configures L0 and L1 cache policy to HIT_EVICT
+ // and L2 cache policy to STREAM.
+ // For stores setting both GLC and SLC configures L0 and L1 cache policy
+ // to MISS_EVICT and the L2 cache policy to STREAM.
+ if (Op == SIMemOp::STORE)
+ Changed |= enableGLCBit(MI);
Changed |= enableSLCBit(MI);
+
return Changed;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
index 6a698348d389..da41a5e2478a 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
@@ -1170,7 +1170,7 @@ void SIPeepholeSDWA::legalizeScalarOperands(MachineInstr &MI,
unsigned I = MI.getOperandNo(&Op);
if (Desc.OpInfo[I].RegClass == -1 ||
- !TRI->hasVGPRs(TRI->getRegClass(Desc.OpInfo[I].RegClass)))
+ !TRI->isVSSuperClass(TRI->getRegClass(Desc.OpInfo[I].RegClass)))
continue;
if (ST.hasSDWAScalar() && ConstantBusCount == 0 && Op.isReg() &&
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
index d1b8e217471e..b0e45dd3e3e3 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
@@ -291,20 +291,19 @@ bool SIPreEmitPeephole::mustRetainExeczBranch(
MBBI != End && MBBI != ToI; ++MBBI) {
const MachineBasicBlock &MBB = *MBBI;
- for (MachineBasicBlock::const_iterator I = MBB.begin(), E = MBB.end();
- I != E; ++I) {
+ for (const MachineInstr &MI : MBB) {
// When a uniform loop is inside non-uniform control flow, the branch
// leaving the loop might never be taken when EXEC = 0.
// Hence we should retain cbranch out of the loop lest it become infinite.
- if (I->isConditionalBranch())
+ if (MI.isConditionalBranch())
return true;
- if (TII->hasUnwantedEffectsWhenEXECEmpty(*I))
+ if (TII->hasUnwantedEffectsWhenEXECEmpty(MI))
return true;
// These instructions are potentially expensive even if EXEC = 0.
- if (TII->isSMRD(*I) || TII->isVMEM(*I) || TII->isFLAT(*I) ||
- TII->isDS(*I) || I->getOpcode() == AMDGPU::S_WAITCNT)
+ if (TII->isSMRD(MI) || TII->isVMEM(MI) || TII->isFLAT(MI) ||
+ TII->isDS(MI) || MI.getOpcode() == AMDGPU::S_WAITCNT)
return true;
++NumInstr;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index bfbe84f696f8..a1d9a23a5084 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -402,6 +402,62 @@ const uint32_t *SIRegisterInfo::getNoPreservedMask() const {
return CSR_AMDGPU_NoRegs_RegMask;
}
+const TargetRegisterClass *
+SIRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
+ const MachineFunction &MF) const {
+ // FIXME: Should have a helper function like getEquivalentVGPRClass to get the
+ // equivalent AV class. If used one, the verifier will crash after
+ // RegBankSelect in the GISel flow. The aligned regclasses are not fully given
+ // until Instruction selection.
+ if (MF.getSubtarget<GCNSubtarget>().hasMAIInsts() &&
+ (isVGPRClass(RC) || isAGPRClass(RC))) {
+ if (RC == &AMDGPU::VGPR_32RegClass || RC == &AMDGPU::AGPR_32RegClass)
+ return &AMDGPU::AV_32RegClass;
+ if (RC == &AMDGPU::VReg_64RegClass || RC == &AMDGPU::AReg_64RegClass)
+ return &AMDGPU::AV_64RegClass;
+ if (RC == &AMDGPU::VReg_64_Align2RegClass ||
+ RC == &AMDGPU::AReg_64_Align2RegClass)
+ return &AMDGPU::AV_64_Align2RegClass;
+ if (RC == &AMDGPU::VReg_96RegClass || RC == &AMDGPU::AReg_96RegClass)
+ return &AMDGPU::AV_96RegClass;
+ if (RC == &AMDGPU::VReg_96_Align2RegClass ||
+ RC == &AMDGPU::AReg_96_Align2RegClass)
+ return &AMDGPU::AV_96_Align2RegClass;
+ if (RC == &AMDGPU::VReg_128RegClass || RC == &AMDGPU::AReg_128RegClass)
+ return &AMDGPU::AV_128RegClass;
+ if (RC == &AMDGPU::VReg_128_Align2RegClass ||
+ RC == &AMDGPU::AReg_128_Align2RegClass)
+ return &AMDGPU::AV_128_Align2RegClass;
+ if (RC == &AMDGPU::VReg_160RegClass || RC == &AMDGPU::AReg_160RegClass)
+ return &AMDGPU::AV_160RegClass;
+ if (RC == &AMDGPU::VReg_160_Align2RegClass ||
+ RC == &AMDGPU::AReg_160_Align2RegClass)
+ return &AMDGPU::AV_160_Align2RegClass;
+ if (RC == &AMDGPU::VReg_192RegClass || RC == &AMDGPU::AReg_192RegClass)
+ return &AMDGPU::AV_192RegClass;
+ if (RC == &AMDGPU::VReg_192_Align2RegClass ||
+ RC == &AMDGPU::AReg_192_Align2RegClass)
+ return &AMDGPU::AV_192_Align2RegClass;
+ if (RC == &AMDGPU::VReg_256RegClass || RC == &AMDGPU::AReg_256RegClass)
+ return &AMDGPU::AV_256RegClass;
+ if (RC == &AMDGPU::VReg_256_Align2RegClass ||
+ RC == &AMDGPU::AReg_256_Align2RegClass)
+ return &AMDGPU::AV_256_Align2RegClass;
+ if (RC == &AMDGPU::VReg_512RegClass || RC == &AMDGPU::AReg_512RegClass)
+ return &AMDGPU::AV_512RegClass;
+ if (RC == &AMDGPU::VReg_512_Align2RegClass ||
+ RC == &AMDGPU::AReg_512_Align2RegClass)
+ return &AMDGPU::AV_512_Align2RegClass;
+ if (RC == &AMDGPU::VReg_1024RegClass || RC == &AMDGPU::AReg_1024RegClass)
+ return &AMDGPU::AV_1024RegClass;
+ if (RC == &AMDGPU::VReg_1024_Align2RegClass ||
+ RC == &AMDGPU::AReg_1024_Align2RegClass)
+ return &AMDGPU::AV_1024_Align2RegClass;
+ }
+
+ return TargetRegisterInfo::getLargestLegalSuperClass(RC, MF);
+}
+
Register SIRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
const SIFrameLowering *TFI =
MF.getSubtarget<GCNSubtarget>().getFrameLowering();
@@ -994,10 +1050,22 @@ static MachineInstrBuilder spillVGPRtoAGPR(const GCNSubtarget &ST,
unsigned Dst = IsStore ? Reg : ValueReg;
unsigned Src = IsStore ? ValueReg : Reg;
- unsigned Opc = (IsStore ^ TRI->isVGPR(MRI, Reg)) ? AMDGPU::V_ACCVGPR_WRITE_B32_e64
- : AMDGPU::V_ACCVGPR_READ_B32_e64;
+ bool IsVGPR = TRI->isVGPR(MRI, Reg);
+ DebugLoc DL = MI->getDebugLoc();
+ if (IsVGPR == TRI->isVGPR(MRI, ValueReg)) {
+ // Spiller during regalloc may restore a spilled register to its superclass.
+ // It could result in AGPR spills restored to VGPRs or the other way around,
+ // making the src and dst with identical regclasses at this point. It just
+ // needs a copy in such cases.
+ auto CopyMIB = BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), Dst)
+ .addReg(Src, getKillRegState(IsKill));
+ CopyMIB->setAsmPrinterFlag(MachineInstr::ReloadReuse);
+ return CopyMIB;
+ }
+ unsigned Opc = (IsStore ^ IsVGPR) ? AMDGPU::V_ACCVGPR_WRITE_B32_e64
+ : AMDGPU::V_ACCVGPR_READ_B32_e64;
- auto MIB = BuildMI(MBB, MI, MI->getDebugLoc(), TII->get(Opc), Dst)
+ auto MIB = BuildMI(MBB, MI, DL, TII->get(Opc), Dst)
.addReg(Src, getKillRegState(IsKill));
MIB->setAsmPrinterFlag(MachineInstr::ReloadReuse);
return MIB;
@@ -1099,7 +1167,7 @@ void SIRegisterInfo::buildSpillLoadStore(
const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg);
// On gfx90a+ AGPR is a regular VGPR acceptable for loads and stores.
- const bool IsAGPR = !ST.hasGFX90AInsts() && hasAGPRs(RC);
+ const bool IsAGPR = !ST.hasGFX90AInsts() && isAGPRClass(RC);
const unsigned RegWidth = AMDGPU::getRegBitWidth(RC->getID()) / 8;
// Always use 4 byte operations for AGPRs because we need to scavenge
@@ -2163,6 +2231,65 @@ SIRegisterInfo::getAGPRClassForBitWidth(unsigned BitWidth) const {
: getAnyAGPRClassForBitWidth(BitWidth);
}
+static const TargetRegisterClass *
+getAnyVectorSuperClassForBitWidth(unsigned BitWidth) {
+ if (BitWidth <= 64)
+ return &AMDGPU::AV_64RegClass;
+ if (BitWidth <= 96)
+ return &AMDGPU::AV_96RegClass;
+ if (BitWidth <= 128)
+ return &AMDGPU::AV_128RegClass;
+ if (BitWidth <= 160)
+ return &AMDGPU::AV_160RegClass;
+ if (BitWidth <= 192)
+ return &AMDGPU::AV_192RegClass;
+ if (BitWidth <= 224)
+ return &AMDGPU::AV_224RegClass;
+ if (BitWidth <= 256)
+ return &AMDGPU::AV_256RegClass;
+ if (BitWidth <= 512)
+ return &AMDGPU::AV_512RegClass;
+ if (BitWidth <= 1024)
+ return &AMDGPU::AV_1024RegClass;
+
+ return nullptr;
+}
+
+static const TargetRegisterClass *
+getAlignedVectorSuperClassForBitWidth(unsigned BitWidth) {
+ if (BitWidth <= 64)
+ return &AMDGPU::AV_64_Align2RegClass;
+ if (BitWidth <= 96)
+ return &AMDGPU::AV_96_Align2RegClass;
+ if (BitWidth <= 128)
+ return &AMDGPU::AV_128_Align2RegClass;
+ if (BitWidth <= 160)
+ return &AMDGPU::AV_160_Align2RegClass;
+ if (BitWidth <= 192)
+ return &AMDGPU::AV_192_Align2RegClass;
+ if (BitWidth <= 224)
+ return &AMDGPU::AV_224_Align2RegClass;
+ if (BitWidth <= 256)
+ return &AMDGPU::AV_256_Align2RegClass;
+ if (BitWidth <= 512)
+ return &AMDGPU::AV_512_Align2RegClass;
+ if (BitWidth <= 1024)
+ return &AMDGPU::AV_1024_Align2RegClass;
+
+ return nullptr;
+}
+
+const TargetRegisterClass *
+SIRegisterInfo::getVectorSuperClassForBitWidth(unsigned BitWidth) const {
+ if (BitWidth <= 16)
+ return &AMDGPU::VGPR_LO16RegClass;
+ if (BitWidth <= 32)
+ return &AMDGPU::AV_32RegClass;
+ return ST.needsAlignedVGPRs()
+ ? getAlignedVectorSuperClassForBitWidth(BitWidth)
+ : getAnyVectorSuperClassForBitWidth(BitWidth);
+}
+
const TargetRegisterClass *
SIRegisterInfo::getSGPRClassForBitWidth(unsigned BitWidth) {
if (BitWidth <= 16)
@@ -2305,15 +2432,14 @@ const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
// We can assume that each lane corresponds to one 32-bit register.
unsigned Size = getNumChannelsFromSubReg(SubIdx) * 32;
- if (isSGPRClass(RC)) {
- if (Size == 32)
- RC = &AMDGPU::SGPR_32RegClass;
- else
- RC = getSGPRClassForBitWidth(Size);
- } else if (hasAGPRs(RC)) {
+ if (isAGPRClass(RC)) {
RC = getAGPRClassForBitWidth(Size);
- } else {
+ } else if (isVGPRClass(RC)) {
RC = getVGPRClassForBitWidth(Size);
+ } else if (isVectorSuperClass(RC)) {
+ RC = getVectorSuperClassForBitWidth(Size);
+ } else {
+ RC = getSGPRClassForBitWidth(Size);
}
assert(RC && "Invalid sub-register class size");
return RC;
@@ -2626,10 +2752,13 @@ bool SIRegisterInfo::isProperlyAlignedRC(const TargetRegisterClass &RC) const {
if (!ST.needsAlignedVGPRs())
return true;
- if (hasVGPRs(&RC))
+ if (isVGPRClass(&RC))
return RC.hasSuperClassEq(getVGPRClassForBitWidth(getRegSizeInBits(RC)));
- if (hasAGPRs(&RC))
+ if (isAGPRClass(&RC))
return RC.hasSuperClassEq(getAGPRClassForBitWidth(getRegSizeInBits(RC)));
+ if (isVectorSuperClass(&RC))
+ return RC.hasSuperClassEq(
+ getVectorSuperClassForBitWidth(getRegSizeInBits(RC)));
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index 8d90ddb1cf4c..f1fe0a1d9329 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -77,6 +77,10 @@ public:
return 100;
}
+ const TargetRegisterClass *
+ getLargestLegalSuperClass(const TargetRegisterClass *RC,
+ const MachineFunction &MF) const override;
+
Register getFrameRegister(const MachineFunction &MF) const override;
bool hasBasePointer(const MachineFunction &MF) const;
@@ -156,6 +160,10 @@ public:
const TargetRegisterClass *getAGPRClassForBitWidth(unsigned BitWidth) const;
LLVM_READONLY
+ const TargetRegisterClass *
+ getVectorSuperClassForBitWidth(unsigned BitWidth) const;
+
+ LLVM_READONLY
static const TargetRegisterClass *getSGPRClassForBitWidth(unsigned BitWidth);
/// Return the 'base' register class for this register.
@@ -164,7 +172,7 @@ public:
/// \returns true if this class contains only SGPR registers
static bool isSGPRClass(const TargetRegisterClass *RC) {
- return !hasVGPRs(RC) && !hasAGPRs(RC);
+ return hasSGPRs(RC) && !hasVGPRs(RC) && !hasAGPRs(RC);
}
/// \returns true if this class ID contains only SGPR registers
@@ -176,12 +184,22 @@ public:
/// \returns true if this class contains only VGPR registers
static bool isVGPRClass(const TargetRegisterClass *RC) {
- return hasVGPRs(RC) && !hasAGPRs(RC);
+ return hasVGPRs(RC) && !hasAGPRs(RC) && !hasSGPRs(RC);
}
/// \returns true if this class contains only AGPR registers
static bool isAGPRClass(const TargetRegisterClass *RC) {
- return hasAGPRs(RC) && !hasVGPRs(RC);
+ return hasAGPRs(RC) && !hasVGPRs(RC) && !hasSGPRs(RC);
+ }
+
+ /// \returns true only if this class contains both VGPR and AGPR registers
+ bool isVectorSuperClass(const TargetRegisterClass *RC) const {
+ return hasVGPRs(RC) && hasAGPRs(RC) && !hasSGPRs(RC);
+ }
+
+ /// \returns true only if this class contains both VGPR and SGPR registers
+ bool isVSSuperClass(const TargetRegisterClass *RC) const {
+ return hasVGPRs(RC) && hasSGPRs(RC) && !hasAGPRs(RC);
}
/// \returns true if this class contains VGPR registers.
@@ -194,6 +212,11 @@ public:
return RC->TSFlags & SIRCFlags::HasAGPR;
}
+ /// \returns true if this class contains SGPR registers.
+ static bool hasSGPRs(const TargetRegisterClass *RC) {
+ return RC->TSFlags & SIRCFlags::HasSGPR;
+ }
+
/// \returns true if this class contains any vector registers.
static bool hasVectorRegisters(const TargetRegisterClass *RC) {
return hasVGPRs(RC) || hasAGPRs(RC);
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index cf1d90484228..340e2b48e5cd 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -133,9 +133,13 @@ class SIRegisterClass <string n, list<ValueType> rTypes, int Align, dag rList>
field bit HasVGPR = 0;
field bit HasAGPR = 0;
+ // For scalar register classes.
+ field bit HasSGPR = 0;
+
// These need to be kept in sync with the enum SIRCFlags.
let TSFlags{0} = HasVGPR;
let TSFlags{1} = HasAGPR;
+ let TSFlags{2} = HasSGPR;
}
multiclass SIRegLoHi16 <string n, bits<16> regIdx, bit ArtificialHigh = 1,
@@ -307,45 +311,51 @@ foreach Index = 0...255 in {
// Groupings using register classes and tuples
//===----------------------------------------------------------------------===//
-def SCC_CLASS : RegisterClass<"AMDGPU", [i1], 1, (add SCC)> {
+def SCC_CLASS : SIRegisterClass<"AMDGPU", [i1], 1, (add SCC)> {
let CopyCost = -1;
let isAllocatable = 0;
+ let HasSGPR = 1;
}
-def M0_CLASS : RegisterClass<"AMDGPU", [i32], 32, (add M0)> {
+def M0_CLASS : SIRegisterClass<"AMDGPU", [i32], 32, (add M0)> {
let CopyCost = 1;
let isAllocatable = 0;
+ let HasSGPR = 1;
}
-def M0_CLASS_LO16 : RegisterClass<"AMDGPU", [i16, f16], 16, (add M0_LO16)> {
+def M0_CLASS_LO16 : SIRegisterClass<"AMDGPU", [i16, f16], 16, (add M0_LO16)> {
let CopyCost = 1;
let Size = 16;
let isAllocatable = 0;
+ let HasSGPR = 1;
}
// TODO: Do we need to set DwarfRegAlias on register tuples?
-def SGPR_LO16 : RegisterClass<"AMDGPU", [i16, f16], 16,
+def SGPR_LO16 : SIRegisterClass<"AMDGPU", [i16, f16], 16,
(add (sequence "SGPR%u_LO16", 0, 105))> {
let AllocationPriority = 9;
let Size = 16;
let GeneratePressureSet = 0;
+ let HasSGPR = 1;
}
-def SGPR_HI16 : RegisterClass<"AMDGPU", [i16, f16], 16,
+def SGPR_HI16 : SIRegisterClass<"AMDGPU", [i16, f16], 16,
(add (sequence "SGPR%u_HI16", 0, 105))> {
let isAllocatable = 0;
let Size = 16;
let GeneratePressureSet = 0;
+ let HasSGPR = 1;
}
// SGPR 32-bit registers
-def SGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
+def SGPR_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
(add (sequence "SGPR%u", 0, 105))> {
// Give all SGPR classes higher priority than VGPR classes, because
// we want to spill SGPRs to VGPRs.
let AllocationPriority = 9;
let GeneratePressureSet = 0;
+ let HasSGPR = 1;
}
// SGPR 64-bit registers
@@ -376,16 +386,18 @@ def SGPR_512Regs : SIRegisterTuples<getSubRegs<16>.ret, SGPR_32, 105, 4, 16, "s"
def SGPR_1024Regs : SIRegisterTuples<getSubRegs<32>.ret, SGPR_32, 105, 4, 32, "s">;
// Trap handler TMP 32-bit registers
-def TTMP_32 : RegisterClass<"AMDGPU", [i32, f32, v2i16, v2f16], 32,
+def TTMP_32 : SIRegisterClass<"AMDGPU", [i32, f32, v2i16, v2f16], 32,
(add (sequence "TTMP%u", 0, 15))> {
let isAllocatable = 0;
+ let HasSGPR = 1;
}
// Trap handler TMP 16-bit registers
-def TTMP_LO16 : RegisterClass<"AMDGPU", [i16, f16], 16,
+def TTMP_LO16 : SIRegisterClass<"AMDGPU", [i16, f16], 16,
(add (sequence "TTMP%u_LO16", 0, 15))> {
let Size = 16;
let isAllocatable = 0;
+ let HasSGPR = 1;
}
// Trap handler TMP 64-bit registers
@@ -598,16 +610,18 @@ def AGPR_1024 : SIRegisterTuples<getSubRegs<32>.ret, AGPR_32, 255, 1, 32, "a">;
// Register classes used as source and destination
//===----------------------------------------------------------------------===//
-def Pseudo_SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
+def Pseudo_SReg_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
(add FP_REG, SP_REG)> {
let isAllocatable = 0;
let CopyCost = -1;
+ let HasSGPR = 1;
}
-def Pseudo_SReg_128 : RegisterClass<"AMDGPU", [v4i32, v2i64, v2f64], 32,
+def Pseudo_SReg_128 : SIRegisterClass<"AMDGPU", [v4i32, v2i64, v2f64], 32,
(add PRIVATE_RSRC_REG)> {
let isAllocatable = 0;
let CopyCost = -1;
+ let HasSGPR = 1;
}
def LDS_DIRECT_CLASS : RegisterClass<"AMDGPU", [i32], 32,
@@ -616,10 +630,10 @@ def LDS_DIRECT_CLASS : RegisterClass<"AMDGPU", [i32], 32,
let CopyCost = -1;
}
-let GeneratePressureSet = 0 in {
+let GeneratePressureSet = 0, HasSGPR = 1 in {
// Subset of SReg_32 without M0 for SMRD instructions and alike.
// See comments in SIInstructions.td for more info.
-def SReg_32_XM0_XEXEC : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
+def SReg_32_XM0_XEXEC : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
(add SGPR_32, VCC_LO, VCC_HI, FLAT_SCR_LO, FLAT_SCR_HI, XNACK_MASK_LO, XNACK_MASK_HI,
SGPR_NULL, TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI, SRC_SHARED_BASE, SRC_SHARED_LIMIT,
SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT, SRC_POPS_EXITING_WAVE_ID,
@@ -627,7 +641,7 @@ def SReg_32_XM0_XEXEC : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f1
let AllocationPriority = 10;
}
-def SReg_LO16_XM0_XEXEC : RegisterClass<"AMDGPU", [i16, f16], 16,
+def SReg_LO16_XM0_XEXEC : SIRegisterClass<"AMDGPU", [i16, f16], 16,
(add SGPR_LO16, VCC_LO_LO16, VCC_HI_LO16, FLAT_SCR_LO_LO16, FLAT_SCR_HI_LO16,
XNACK_MASK_LO_LO16, XNACK_MASK_HI_LO16, SGPR_NULL_LO16, TTMP_LO16, TMA_LO_LO16,
TMA_HI_LO16, TBA_LO_LO16, TBA_HI_LO16, SRC_SHARED_BASE_LO16,
@@ -637,29 +651,29 @@ def SReg_LO16_XM0_XEXEC : RegisterClass<"AMDGPU", [i16, f16], 16,
let AllocationPriority = 10;
}
-def SReg_32_XEXEC_HI : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
+def SReg_32_XEXEC_HI : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
(add SReg_32_XM0_XEXEC, EXEC_LO, M0_CLASS)> {
let AllocationPriority = 10;
}
-def SReg_LO16_XEXEC_HI : RegisterClass<"AMDGPU", [i16, f16], 16,
+def SReg_LO16_XEXEC_HI : SIRegisterClass<"AMDGPU", [i16, f16], 16,
(add SReg_LO16_XM0_XEXEC, EXEC_LO_LO16, M0_CLASS_LO16)> {
let Size = 16;
let AllocationPriority = 10;
}
-def SReg_32_XM0 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
+def SReg_32_XM0 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
(add SReg_32_XM0_XEXEC, EXEC_LO, EXEC_HI)> {
let AllocationPriority = 10;
}
-def SReg_LO16_XM0 : RegisterClass<"AMDGPU", [i16, f16], 16,
+def SReg_LO16_XM0 : SIRegisterClass<"AMDGPU", [i16, f16], 16,
(add SReg_LO16_XM0_XEXEC, EXEC_LO_LO16, EXEC_HI_LO16)> {
let Size = 16;
let AllocationPriority = 10;
}
-def SReg_LO16 : RegisterClass<"AMDGPU", [i16, f16], 16,
+def SReg_LO16 : SIRegisterClass<"AMDGPU", [i16, f16], 16,
(add SGPR_LO16, SReg_LO16_XM0, M0_CLASS_LO16, EXEC_LO_LO16, EXEC_HI_LO16, SReg_LO16_XEXEC_HI)> {
let Size = 16;
let AllocationPriority = 10;
@@ -667,65 +681,75 @@ def SReg_LO16 : RegisterClass<"AMDGPU", [i16, f16], 16,
} // End GeneratePressureSet = 0
// Register class for all scalar registers (SGPRs + Special Registers)
-def SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
+def SReg_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
(add SReg_32_XM0, M0_CLASS, EXEC_LO, EXEC_HI, SReg_32_XEXEC_HI)> {
let AllocationPriority = 10;
+ let HasSGPR = 1;
}
let GeneratePressureSet = 0 in {
-def SRegOrLds_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
+def SRegOrLds_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
(add SReg_32, LDS_DIRECT_CLASS)> {
let isAllocatable = 0;
+ let HasSGPR = 1;
}
-def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, v4i16, v4f16], 32,
+def SGPR_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, v4i16, v4f16], 32,
(add SGPR_64Regs)> {
let CopyCost = 1;
let AllocationPriority = 11;
+ let HasSGPR = 1;
}
// CCR (call clobbered registers) SGPR 64-bit registers
-def CCR_SGPR_64 : RegisterClass<"AMDGPU", SGPR_64.RegTypes, 32,
+def CCR_SGPR_64 : SIRegisterClass<"AMDGPU", SGPR_64.RegTypes, 32,
(add (trunc SGPR_64, 16))> {
let CopyCost = SGPR_64.CopyCost;
let AllocationPriority = SGPR_64.AllocationPriority;
+ let HasSGPR = 1;
}
// Call clobbered 64-bit SGPRs for AMDGPU_Gfx CC
-def Gfx_CCR_SGPR_64 : RegisterClass<"AMDGPU", SGPR_64.RegTypes, 32,
+def Gfx_CCR_SGPR_64 : SIRegisterClass<"AMDGPU", SGPR_64.RegTypes, 32,
(add (trunc (shl SGPR_64, 15), 1), // s[30:31]
(trunc (shl SGPR_64, 18), 14))> { // s[36:37]-s[s62:63]
let CopyCost = SGPR_64.CopyCost;
let AllocationPriority = SGPR_64.AllocationPriority;
+ let HasSGPR = 1;
}
-def TTMP_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16], 32,
+def TTMP_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16], 32,
(add TTMP_64Regs)> {
let isAllocatable = 0;
+ let HasSGPR = 1;
}
-def SReg_64_XEXEC : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16], 32,
+def SReg_64_XEXEC : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16], 32,
(add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, TTMP_64, TBA, TMA)> {
let CopyCost = 1;
let AllocationPriority = 13;
+ let HasSGPR = 1;
}
-def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16], 32,
+def SReg_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16], 32,
(add SReg_64_XEXEC, EXEC)> {
let CopyCost = 1;
let AllocationPriority = 13;
+ let HasSGPR = 1;
}
-def SReg_1_XEXEC : RegisterClass<"AMDGPU", [i1], 32,
+def SReg_1_XEXEC : SIRegisterClass<"AMDGPU", [i1], 32,
(add SReg_64_XEXEC, SReg_32_XM0_XEXEC)> {
let CopyCost = 1;
let isAllocatable = 0;
+ let HasSGPR = 1;
}
-def SReg_1 : RegisterClass<"AMDGPU", [i1], 32,
+def SReg_1 : SIRegisterClass<"AMDGPU", [i1], 32,
(add SReg_1_XEXEC, EXEC, EXEC_LO)> {
let CopyCost = 1;
let isAllocatable = 0;
+ let HasSGPR = 1;
}
multiclass SRegClass<int numRegs, int priority,
@@ -738,18 +762,18 @@ multiclass SRegClass<int numRegs, int priority,
defvar sgprName = !strconcat("SGPR_", suffix);
defvar ttmpName = !strconcat("TTMP_", suffix);
- let AllocationPriority = priority, CopyCost = copyCost in {
- def "" # sgprName : RegisterClass<"AMDGPU", regTypes, 32, (add regList)> {
+ let AllocationPriority = priority, CopyCost = copyCost, HasSGPR = 1 in {
+ def "" # sgprName : SIRegisterClass<"AMDGPU", regTypes, 32, (add regList)> {
}
if hasTTMP then {
- def "" # ttmpName : RegisterClass<"AMDGPU", regTypes, 32, (add ttmpList)> {
+ def "" # ttmpName : SIRegisterClass<"AMDGPU", regTypes, 32, (add ttmpList)> {
let isAllocatable = 0;
}
}
def SReg_ # suffix :
- RegisterClass<"AMDGPU", regTypes, 32,
+ SIRegisterClass<"AMDGPU", regTypes, 32,
!con(!dag(add, [!cast<RegisterClass>(sgprName)], ["sgpr"]),
!if(hasTTMP,
!dag(add, [!cast<RegisterClass>(ttmpName)], ["ttmp"]),
@@ -855,44 +879,45 @@ def VS_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
(add VGPR_32, SReg_32, LDS_DIRECT_CLASS)> {
let isAllocatable = 0;
let HasVGPR = 1;
+ let HasSGPR = 1;
}
def VS_64 : SIRegisterClass<"AMDGPU", [i64, f64, v2f32], 32, (add VReg_64, SReg_64)> {
let isAllocatable = 0;
let HasVGPR = 1;
+ let HasSGPR = 1;
}
-def AV_32 : SIRegisterClass<"AMDGPU", VGPR_32.RegTypes, 32,
- (add AGPR_32, VGPR_32)> {
- let isAllocatable = 0;
- let HasVGPR = 1;
- let HasAGPR = 1;
-}
-
-def AV_64 : SIRegisterClass<"AMDGPU", VReg_64.RegTypes, 32,
- (add AReg_64, VReg_64)> {
- let isAllocatable = 0;
+def AV_32 : SIRegisterClass<"AMDGPU", VGPR_32.RegTypes, 32, (add VGPR_32, AGPR_32)> {
let HasVGPR = 1;
let HasAGPR = 1;
}
} // End GeneratePressureSet = 0
-let HasVGPR = 1, HasAGPR = 1 in {
-def AV_96 : SIRegisterClass<"AMDGPU", VReg_96.RegTypes, 32,
- (add AReg_96, VReg_96)> {
- let isAllocatable = 0;
-}
+// Define a register tuple class, along with one requiring an even
+// aligned base register.
+multiclass AVRegClass<int numRegs, list<ValueType> regTypes,
+ dag vregList, dag aregList> {
+ let HasVGPR = 1, HasAGPR = 1 in {
+ // Define the regular class.
+ def "" : VRegClassBase<numRegs, regTypes, (add vregList, aregList)>;
-def AV_128 : SIRegisterClass<"AMDGPU", VReg_128.RegTypes, 32,
- (add AReg_128, VReg_128)> {
- let isAllocatable = 0;
+ // Define 2-aligned variant
+ def _Align2 : VRegClassBase<numRegs, regTypes,
+ (add (decimate vregList, 2),
+ (decimate aregList, 2))>;
+ }
}
-def AV_160 : SIRegisterClass<"AMDGPU", VReg_160.RegTypes, 32,
- (add AReg_160, VReg_160)> {
- let isAllocatable = 0;
-}
-} // End HasVGPR = 1, HasAGPR = 1
+defm AV_64 : AVRegClass<2, VReg_64.RegTypes, (add VGPR_64), (add AGPR_64)>;
+defm AV_96 : AVRegClass<3, VReg_96.RegTypes, (add VGPR_96), (add AGPR_96)>;
+defm AV_128 : AVRegClass<4, VReg_128.RegTypes, (add VGPR_128), (add AGPR_128)>;
+defm AV_160 : AVRegClass<5, VReg_160.RegTypes, (add VGPR_160), (add AGPR_160)>;
+defm AV_192 : AVRegClass<6, VReg_160.RegTypes, (add VGPR_192), (add AGPR_192)>;
+defm AV_224 : AVRegClass<7, VReg_160.RegTypes, (add VGPR_224), (add AGPR_224)>;
+defm AV_256 : AVRegClass<8, VReg_160.RegTypes, (add VGPR_256), (add AGPR_256)>;
+defm AV_512 : AVRegClass<16, VReg_160.RegTypes, (add VGPR_512), (add AGPR_512)>;
+defm AV_1024 : AVRegClass<32, VReg_160.RegTypes, (add VGPR_1024), (add AGPR_1024)>;
//===----------------------------------------------------------------------===//
// Register operands
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SISchedule.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SISchedule.td
index 0792b303b830..18d424a3bc9f 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SISchedule.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SISchedule.td
@@ -93,16 +93,16 @@ def HWBranch : ProcResource<1> {
let BufferSize = 1;
}
def HWExport : ProcResource<1> {
- let BufferSize = 7; // Taken from S_WAITCNT
+ let BufferSize = 1;
}
def HWLGKM : ProcResource<1> {
- let BufferSize = 31; // Taken from S_WAITCNT
+ let BufferSize = 1;
}
def HWSALU : ProcResource<1> {
let BufferSize = 1;
}
def HWVMEM : ProcResource<1> {
- let BufferSize = 15; // Taken from S_WAITCNT
+ let BufferSize = 1;
}
def HWVALU : ProcResource<1> {
let BufferSize = 1;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
index 6f63f686635a..46012e5d7d97 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -487,6 +487,8 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
bool WQMOutputs = MF.getFunction().hasFnAttribute("amdgpu-ps-wqm-outputs");
SmallVector<MachineInstr *, 4> SetInactiveInstrs;
SmallVector<MachineInstr *, 4> SoftWQMInstrs;
+ bool HasImplicitDerivatives =
+ MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS;
// We need to visit the basic blocks in reverse post-order so that we visit
// defs before uses, in particular so that we don't accidentally mark an
@@ -497,8 +499,7 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
MachineBasicBlock &MBB = **BI;
BlockInfo &BBI = Blocks[&MBB];
- for (auto II = MBB.begin(), IE = MBB.end(); II != IE; ++II) {
- MachineInstr &MI = *II;
+ for (MachineInstr &MI : MBB) {
InstrInfo &III = Instructions[&MI];
unsigned Opcode = MI.getOpcode();
char Flags = 0;
@@ -507,6 +508,11 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
// If LOD is not supported WQM is not needed.
if (!ST->hasExtendedImageInsts())
continue;
+ // Only generate implicit WQM if implicit derivatives are required.
+ // This avoids inserting unintended WQM if a shader type without
+ // implicit derivatives uses an image sampling instruction.
+ if (!HasImplicitDerivatives)
+ continue;
// Sampling instructions don't need to produce results for all pixels
// in a quad, they just require all inputs of a quad to have been
// computed for derivatives.
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 9da7b9f5145d..d20eaaaa65e8 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -1626,13 +1626,14 @@ unsigned getRegBitWidth(unsigned RCID) {
return 32;
case AMDGPU::SGPR_64RegClassID:
case AMDGPU::VS_64RegClassID:
- case AMDGPU::AV_64RegClassID:
case AMDGPU::SReg_64RegClassID:
case AMDGPU::VReg_64RegClassID:
case AMDGPU::AReg_64RegClassID:
case AMDGPU::SReg_64_XEXECRegClassID:
case AMDGPU::VReg_64_Align2RegClassID:
case AMDGPU::AReg_64_Align2RegClassID:
+ case AMDGPU::AV_64RegClassID:
+ case AMDGPU::AV_64_Align2RegClassID:
return 64;
case AMDGPU::SGPR_96RegClassID:
case AMDGPU::SReg_96RegClassID:
@@ -1641,6 +1642,7 @@ unsigned getRegBitWidth(unsigned RCID) {
case AMDGPU::VReg_96_Align2RegClassID:
case AMDGPU::AReg_96_Align2RegClassID:
case AMDGPU::AV_96RegClassID:
+ case AMDGPU::AV_96_Align2RegClassID:
return 96;
case AMDGPU::SGPR_128RegClassID:
case AMDGPU::SReg_128RegClassID:
@@ -1649,6 +1651,7 @@ unsigned getRegBitWidth(unsigned RCID) {
case AMDGPU::VReg_128_Align2RegClassID:
case AMDGPU::AReg_128_Align2RegClassID:
case AMDGPU::AV_128RegClassID:
+ case AMDGPU::AV_128_Align2RegClassID:
return 128;
case AMDGPU::SGPR_160RegClassID:
case AMDGPU::SReg_160RegClassID:
@@ -1657,6 +1660,7 @@ unsigned getRegBitWidth(unsigned RCID) {
case AMDGPU::VReg_160_Align2RegClassID:
case AMDGPU::AReg_160_Align2RegClassID:
case AMDGPU::AV_160RegClassID:
+ case AMDGPU::AV_160_Align2RegClassID:
return 160;
case AMDGPU::SGPR_192RegClassID:
case AMDGPU::SReg_192RegClassID:
@@ -1664,6 +1668,8 @@ unsigned getRegBitWidth(unsigned RCID) {
case AMDGPU::AReg_192RegClassID:
case AMDGPU::VReg_192_Align2RegClassID:
case AMDGPU::AReg_192_Align2RegClassID:
+ case AMDGPU::AV_192RegClassID:
+ case AMDGPU::AV_192_Align2RegClassID:
return 192;
case AMDGPU::SGPR_224RegClassID:
case AMDGPU::SReg_224RegClassID:
@@ -1671,6 +1677,8 @@ unsigned getRegBitWidth(unsigned RCID) {
case AMDGPU::AReg_224RegClassID:
case AMDGPU::VReg_224_Align2RegClassID:
case AMDGPU::AReg_224_Align2RegClassID:
+ case AMDGPU::AV_224RegClassID:
+ case AMDGPU::AV_224_Align2RegClassID:
return 224;
case AMDGPU::SGPR_256RegClassID:
case AMDGPU::SReg_256RegClassID:
@@ -1678,6 +1686,8 @@ unsigned getRegBitWidth(unsigned RCID) {
case AMDGPU::AReg_256RegClassID:
case AMDGPU::VReg_256_Align2RegClassID:
case AMDGPU::AReg_256_Align2RegClassID:
+ case AMDGPU::AV_256RegClassID:
+ case AMDGPU::AV_256_Align2RegClassID:
return 256;
case AMDGPU::SGPR_512RegClassID:
case AMDGPU::SReg_512RegClassID:
@@ -1685,6 +1695,8 @@ unsigned getRegBitWidth(unsigned RCID) {
case AMDGPU::AReg_512RegClassID:
case AMDGPU::VReg_512_Align2RegClassID:
case AMDGPU::AReg_512_Align2RegClassID:
+ case AMDGPU::AV_512RegClassID:
+ case AMDGPU::AV_512_Align2RegClassID:
return 512;
case AMDGPU::SGPR_1024RegClassID:
case AMDGPU::SReg_1024RegClassID:
@@ -1692,6 +1704,8 @@ unsigned getRegBitWidth(unsigned RCID) {
case AMDGPU::AReg_1024RegClassID:
case AMDGPU::VReg_1024_Align2RegClassID:
case AMDGPU::AReg_1024_Align2RegClassID:
+ case AMDGPU::AV_1024RegClassID:
+ case AMDGPU::AV_1024_Align2RegClassID:
return 1024;
default:
llvm_unreachable("Unexpected register class");
diff --git a/contrib/llvm-project/llvm/lib/Target/ARC/ARCMCInstLower.cpp b/contrib/llvm-project/llvm/lib/Target/ARC/ARCMCInstLower.cpp
index 62462b77eccf..50ba9fe75232 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARC/ARCMCInstLower.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARC/ARCMCInstLower.cpp
@@ -104,8 +104,7 @@ MCOperand ARCMCInstLower::LowerOperand(const MachineOperand &MO,
void ARCMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
OutMI.setOpcode(MI->getOpcode());
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (const MachineOperand &MO : MI->operands()) {
MCOperand MCOp = LowerOperand(MO);
if (MCOp.isValid())
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARM.h b/contrib/llvm-project/llvm/lib/Target/ARM/ARM.h
index 5500783f74db..1d5e45aec06c 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARM.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARM.h
@@ -44,6 +44,7 @@ FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM,
FunctionPass *createA15SDOptimizerPass();
FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false);
FunctionPass *createARMExpandPseudoPass();
+FunctionPass *createARMBranchTargetsPass();
FunctionPass *createARMConstantIslandPass();
FunctionPass *createMLxExpansionPass();
FunctionPass *createThumb2ITBlockPass();
@@ -66,6 +67,7 @@ void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
void initializeARMParallelDSPPass(PassRegistry &);
void initializeARMLoadStoreOptPass(PassRegistry &);
void initializeARMPreAllocLoadStoreOptPass(PassRegistry &);
+void initializeARMBranchTargetsPass(PassRegistry &);
void initializeARMConstantIslandsPass(PassRegistry &);
void initializeARMExpandPseudoPass(PassRegistry &);
void initializeThumb2SizeReducePass(PassRegistry &);
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARM.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARM.td
index 8cbd80f1bf65..e03dd597eb65 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARM.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARM.td
@@ -442,6 +442,10 @@ def FeatureFixCMSE_CVE_2021_35465 : SubtargetFeature<"fix-cmse-cve-2021-35465",
"Mitigate against the cve-2021-35465 "
"security vulnurability">;
+def FeaturePACBTI : SubtargetFeature<"pacbti", "HasPACBTI", "true",
+ "Enable Pointer Authentication and Branch "
+ "Target Identification">;
+
//===----------------------------------------------------------------------===//
// ARM architecture class
//
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
index 9901b86b0e87..6a88ac485e69 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -763,6 +763,32 @@ void ARMAsmPrinter::emitAttributes() {
int EnumBuildAttr = EnumWidth == 1 ? 1 : 2;
ATS.emitAttribute(ARMBuildAttrs::ABI_enum_size, EnumBuildAttr);
}
+
+ auto *PACValue = mdconst::extract_or_null<ConstantInt>(
+ SourceModule->getModuleFlag("sign-return-address"));
+ if (PACValue && PACValue->getZExtValue() == 1) {
+ // If "+pacbti" is used as an architecture extension,
+ // Tag_PAC_extension is emitted in
+ // ARMTargetStreamer::emitTargetAttributes().
+ if (!STI.hasPACBTI()) {
+ ATS.emitAttribute(ARMBuildAttrs::PAC_extension,
+ ARMBuildAttrs::AllowPACInNOPSpace);
+ }
+ ATS.emitAttribute(ARMBuildAttrs::PACRET_use, ARMBuildAttrs::PACRETUsed);
+ }
+
+ auto *BTIValue = mdconst::extract_or_null<ConstantInt>(
+ SourceModule->getModuleFlag("branch-target-enforcement"));
+ if (BTIValue && BTIValue->getZExtValue() == 1) {
+ // If "+pacbti" is used as an architecture extension,
+ // Tag_BTI_extension is emitted in
+ // ARMTargetStreamer::emitTargetAttributes().
+ if (!STI.hasPACBTI()) {
+ ATS.emitAttribute(ARMBuildAttrs::BTI_extension,
+ ARMBuildAttrs::AllowBTIInNOPSpace);
+ }
+ ATS.emitAttribute(ARMBuildAttrs::BTI_use, ARMBuildAttrs::BTIUsed);
+ }
}
}
@@ -1535,17 +1561,17 @@ void ARMAsmPrinter::emitInstruction(const MachineInstr *MI) {
MCInst.addExpr(BranchTarget);
}
- if (Opc == ARM::t2BFic) {
- const MCExpr *ElseLabel = MCSymbolRefExpr::create(
- getBFLabel(DL.getPrivateGlobalPrefix(), getFunctionNumber(),
- MI->getOperand(2).getIndex(), OutContext),
- OutContext);
- MCInst.addExpr(ElseLabel);
- MCInst.addImm(MI->getOperand(3).getImm());
- } else {
- MCInst.addImm(MI->getOperand(2).getImm())
- .addReg(MI->getOperand(3).getReg());
- }
+ if (Opc == ARM::t2BFic) {
+ const MCExpr *ElseLabel = MCSymbolRefExpr::create(
+ getBFLabel(DL.getPrivateGlobalPrefix(), getFunctionNumber(),
+ MI->getOperand(2).getIndex(), OutContext),
+ OutContext);
+ MCInst.addExpr(ElseLabel);
+ MCInst.addImm(MI->getOperand(3).getImm());
+ } else {
+ MCInst.addImm(MI->getOperand(2).getImm())
+ .addReg(MI->getOperand(3).getReg());
+ }
EmitToStreamer(*OutStreamer, MCInst);
return;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 2d981be4cfc1..2a12947d24a8 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -310,8 +310,7 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineInstr &MI, LiveVariables *LV,
// Transfer LiveVariables states, kill / dead info.
if (LV) {
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI.getOperand(i);
+ for (const MachineOperand &MO : MI.operands()) {
if (MO.isReg() && Register::isVirtualRegister(MO.getReg())) {
Register Reg = MO.getReg();
@@ -634,8 +633,7 @@ bool ARMBaseInstrInfo::ClobbersPredicate(MachineInstr &MI,
std::vector<MachineOperand> &Pred,
bool SkipDead) const {
bool Found = false;
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI.getOperand(i);
+ for (const MachineOperand &MO : MI.operands()) {
bool ClobbersCPSR = MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR);
bool IsCPSR = MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR;
if (ClobbersCPSR || IsCPSR) {
@@ -732,8 +730,7 @@ bool ARMBaseInstrInfo::isPredicable(const MachineInstr &MI) const {
namespace llvm {
template <> bool IsCPSRDead<MachineInstr>(const MachineInstr *MI) {
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (const MachineOperand &MO : MI->operands()) {
if (!MO.isReg() || MO.isUndef() || MO.isUse())
continue;
if (MO.getReg() != ARM::CPSR)
@@ -1860,15 +1857,11 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr &MI0,
const MachineInstr &MI1,
const MachineRegisterInfo *MRI) const {
unsigned Opcode = MI0.getOpcode();
- if (Opcode == ARM::t2LDRpci ||
- Opcode == ARM::t2LDRpci_pic ||
- Opcode == ARM::tLDRpci ||
- Opcode == ARM::tLDRpci_pic ||
- Opcode == ARM::LDRLIT_ga_pcrel ||
- Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
- Opcode == ARM::tLDRLIT_ga_pcrel ||
- Opcode == ARM::MOV_ga_pcrel ||
- Opcode == ARM::MOV_ga_pcrel_ldr ||
+ if (Opcode == ARM::t2LDRpci || Opcode == ARM::t2LDRpci_pic ||
+ Opcode == ARM::tLDRpci || Opcode == ARM::tLDRpci_pic ||
+ Opcode == ARM::LDRLIT_ga_pcrel || Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
+ Opcode == ARM::tLDRLIT_ga_pcrel || Opcode == ARM::t2LDRLIT_ga_pcrel ||
+ Opcode == ARM::MOV_ga_pcrel || Opcode == ARM::MOV_ga_pcrel_ldr ||
Opcode == ARM::t2MOV_ga_pcrel) {
if (MI1.getOpcode() != Opcode)
return false;
@@ -1880,11 +1873,9 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr &MI0,
if (MO0.getOffset() != MO1.getOffset())
return false;
- if (Opcode == ARM::LDRLIT_ga_pcrel ||
- Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
- Opcode == ARM::tLDRLIT_ga_pcrel ||
- Opcode == ARM::MOV_ga_pcrel ||
- Opcode == ARM::MOV_ga_pcrel_ldr ||
+ if (Opcode == ARM::LDRLIT_ga_pcrel || Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
+ Opcode == ARM::tLDRLIT_ga_pcrel || Opcode == ARM::t2LDRLIT_ga_pcrel ||
+ Opcode == ARM::MOV_ga_pcrel || Opcode == ARM::MOV_ga_pcrel_ldr ||
Opcode == ARM::t2MOV_ga_pcrel)
// Ignore the PC labels.
return MO0.getGlobal() == MO1.getGlobal();
@@ -2312,8 +2303,7 @@ ARMBaseInstrInfo::canFoldIntoMOVCC(Register Reg, const MachineRegisterInfo &MRI,
return nullptr;
// Check if MI has any non-dead defs or physreg uses. This also detects
// predicated instructions which will be reading CPSR.
- for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 1)) {
// Reject frame index operands, PEI can't handle the predicated pseudos.
if (MO.isFI() || MO.isCPI() || MO.isJTI())
return nullptr;
@@ -4857,11 +4847,10 @@ bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI,
if (MI.getOpcode() == ARM::tPUSH ||
MI.getOpcode() == ARM::tPOP ||
MI.getOpcode() == ARM::tPOP_RET) {
- for (int i = 2, e = MI.getNumOperands(); i < e; ++i) {
- if (MI.getOperand(i).isImplicit() ||
- !MI.getOperand(i).isReg())
+ for (const MachineOperand &MO : llvm::drop_begin(MI.operands(), 2)) {
+ if (MO.isImplicit() || !MO.isReg())
continue;
- Register Reg = MI.getOperand(i).getReg();
+ Register Reg = MO.getReg();
if (Reg < ARM::R0 || Reg > ARM::R7) {
if (!(MI.getOpcode() == ARM::tPUSH && Reg == ARM::LR) &&
!(MI.getOpcode() == ARM::tPOP_RET && Reg == ARM::PC)) {
@@ -5748,17 +5737,17 @@ enum MachineOutlinerMBBFlags {
};
struct OutlinerCosts {
- const int CallTailCall;
- const int FrameTailCall;
- const int CallThunk;
- const int FrameThunk;
- const int CallNoLRSave;
- const int FrameNoLRSave;
- const int CallRegSave;
- const int FrameRegSave;
- const int CallDefault;
- const int FrameDefault;
- const int SaveRestoreLROnStack;
+ int CallTailCall;
+ int FrameTailCall;
+ int CallThunk;
+ int FrameThunk;
+ int CallNoLRSave;
+ int FrameNoLRSave;
+ int CallRegSave;
+ int FrameRegSave;
+ int CallDefault;
+ int FrameDefault;
+ int SaveRestoreLROnStack;
OutlinerCosts(const ARMSubtarget &target)
: CallTailCall(target.isThumb() ? 4 : 4),
@@ -5879,6 +5868,24 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo(
return outliner::OutlinedFunction();
}
+ // Partition the candidates in two sets: one with BTI enabled and one with BTI
+ // disabled. Remove the candidates from the smaller set. We expect the
+ // majority of the candidates to be in consensus with regard to branch target
+ // enforcement with just a few oddballs, but if they are the same number
+ // prefer the non-BTI ones for outlining, since they have less overhead.
+ auto NoBTI =
+ llvm::partition(RepeatedSequenceLocs, [](const outliner::Candidate &C) {
+ const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>();
+ return AFI.branchTargetEnforcement();
+ });
+ if (std::distance(RepeatedSequenceLocs.begin(), NoBTI) >
+ std::distance(NoBTI, RepeatedSequenceLocs.end()))
+ RepeatedSequenceLocs.erase(NoBTI, RepeatedSequenceLocs.end());
+ else
+ RepeatedSequenceLocs.erase(RepeatedSequenceLocs.begin(), NoBTI);
+ if (RepeatedSequenceLocs.size() < 2)
+ return outliner::OutlinedFunction();
+
// At this point, we have only "safe" candidates to outline. Figure out
// frame + call instruction information.
@@ -5892,6 +5899,16 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo(
};
OutlinerCosts Costs(Subtarget);
+ const auto &SomeMFI =
+ *RepeatedSequenceLocs.front().getMF()->getInfo<ARMFunctionInfo>();
+ // Adjust costs to account for the BTI instructions.
+ if (SomeMFI.branchTargetEnforcement()) {
+ Costs.FrameDefault += 4;
+ Costs.FrameNoLRSave += 4;
+ Costs.FrameRegSave += 4;
+ Costs.FrameTailCall += 4;
+ Costs.FrameThunk += 4;
+ }
unsigned FrameID = MachineOutlinerDefault;
unsigned NumBytesToCreateFrame = Costs.FrameDefault;
@@ -6004,16 +6021,18 @@ bool ARMBaseInstrInfo::checkAndUpdateStackOffset(MachineInstr *MI,
// Stack might be involved but addressing mode doesn't handle any offset.
// Rq: AddrModeT1_[1|2|4] don't operate on SP
- if (AddrMode == ARMII::AddrMode1 // Arithmetic instructions
- || AddrMode == ARMII::AddrMode4 // Load/Store Multiple
- || AddrMode == ARMII::AddrMode6 // Neon Load/Store Multiple
- || AddrMode == ARMII::AddrModeT2_so // SP can't be used as based register
- || AddrMode == ARMII::AddrModeT2_pc // PCrel access
- || AddrMode == ARMII::AddrMode2 // Used by PRE and POST indexed LD/ST
- || AddrMode == ARMII::AddrModeT2_i7 // v8.1-M MVE
- || AddrMode == ARMII::AddrModeT2_i7s2 // v8.1-M MVE
- || AddrMode == ARMII::AddrModeT2_i7s4 // v8.1-M sys regs VLDR/VSTR
- || AddrMode == ARMII::AddrModeNone)
+ if (AddrMode == ARMII::AddrMode1 || // Arithmetic instructions
+ AddrMode == ARMII::AddrMode4 || // Load/Store Multiple
+ AddrMode == ARMII::AddrMode6 || // Neon Load/Store Multiple
+ AddrMode == ARMII::AddrModeT2_so || // SP can't be used as based register
+ AddrMode == ARMII::AddrModeT2_pc || // PCrel access
+ AddrMode == ARMII::AddrMode2 || // Used by PRE and POST indexed LD/ST
+ AddrMode == ARMII::AddrModeT2_i7 || // v8.1-M MVE
+ AddrMode == ARMII::AddrModeT2_i7s2 || // v8.1-M MVE
+ AddrMode == ARMII::AddrModeT2_i7s4 || // v8.1-M sys regs VLDR/VSTR
+ AddrMode == ARMII::AddrModeNone ||
+ AddrMode == ARMII::AddrModeT2_i8 || // Pre/Post inc instructions
+ AddrMode == ARMII::AddrModeT2_i8neg) // Always negative imm
return false;
unsigned NumOps = MI->getDesc().getNumOperands();
@@ -6051,7 +6070,7 @@ bool ARMBaseInstrInfo::checkAndUpdateStackOffset(MachineInstr *MI,
NumBits = 8;
Scale = 2;
break;
- case ARMII::AddrModeT2_i8:
+ case ARMII::AddrModeT2_i8pos:
NumBits = 8;
break;
case ARMII::AddrModeT2_i8s4:
@@ -6089,7 +6108,18 @@ bool ARMBaseInstrInfo::checkAndUpdateStackOffset(MachineInstr *MI,
}
return false;
+}
+
+void ARMBaseInstrInfo::mergeOutliningCandidateAttributes(
+ Function &F, std::vector<outliner::Candidate> &Candidates) const {
+ outliner::Candidate &C = Candidates.front();
+ // branch-target-enforcement is guaranteed to be consistent between all
+ // candidates, so we only need to look at one.
+ const Function &CFn = C.getMF()->getFunction();
+ if (CFn.hasFnAttribute("branch-target-enforcement"))
+ F.addFnAttr(CFn.getFnAttribute("branch-target-enforcement"));
+ ARMGenInstrInfo::mergeOutliningCandidateAttributes(F, Candidates);
}
bool ARMBaseInstrInfo::isFunctionSafeToOutlineFrom(
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
index db9320962e81..5fa912ae35d7 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -349,6 +349,8 @@ public:
bool OutlineFromLinkOnceODRs) const override;
outliner::OutlinedFunction getOutliningCandidateInfo(
std::vector<outliner::Candidate> &RepeatedSequenceLocs) const override;
+ void mergeOutliningCandidateAttributes(
+ Function &F, std::vector<outliner::Candidate> &Candidates) const override;
outliner::InstrType getOutliningType(MachineBasicBlock::iterator &MIT,
unsigned Flags) const override;
bool isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
@@ -877,19 +879,23 @@ inline bool isLegalAddressImm(unsigned Opcode, int Imm,
unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
switch (AddrMode) {
case ARMII::AddrModeT2_i7:
- return std::abs(Imm) < (((1 << 7) * 1) - 1);
+ return std::abs(Imm) < ((1 << 7) * 1);
case ARMII::AddrModeT2_i7s2:
- return std::abs(Imm) < (((1 << 7) * 2) - 1) && Imm % 2 == 0;
+ return std::abs(Imm) < ((1 << 7) * 2) && Imm % 2 == 0;
case ARMII::AddrModeT2_i7s4:
- return std::abs(Imm) < (((1 << 7) * 4) - 1) && Imm % 4 == 0;
+ return std::abs(Imm) < ((1 << 7) * 4) && Imm % 4 == 0;
case ARMII::AddrModeT2_i8:
- return std::abs(Imm) < (((1 << 8) * 1) - 1);
- case ARMII::AddrMode2:
- return std::abs(Imm) < (((1 << 12) * 1) - 1);
- case ARMII::AddrModeT2_i12:
- return Imm >= 0 && Imm < (((1 << 12) * 1) - 1);
+ return std::abs(Imm) < ((1 << 8) * 1);
+ case ARMII::AddrModeT2_i8pos:
+ return Imm >= 0 && Imm < ((1 << 8) * 1);
+ case ARMII::AddrModeT2_i8neg:
+ return Imm < 0 && -Imm < ((1 << 8) * 1);
case ARMII::AddrModeT2_i8s4:
- return std::abs(Imm) < (((1 << 8) * 4) - 1) && Imm % 4 == 0;
+ return std::abs(Imm) < ((1 << 8) * 4) && Imm % 4 == 0;
+ case ARMII::AddrModeT2_i12:
+ return Imm >= 0 && Imm < ((1 << 12) * 1);
+ case ARMII::AddrMode2:
+ return std::abs(Imm) < ((1 << 12) * 1);
default:
llvm_unreachable("Unhandled Addressing mode");
}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMBranchTargets.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMBranchTargets.cpp
new file mode 100644
index 000000000000..1091c1f970fa
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMBranchTargets.cpp
@@ -0,0 +1,135 @@
+//===-- ARMBranchTargets.cpp -- Harden code using v8.1-M BTI extension -----==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass inserts BTI instructions at the start of every function and basic
+// block which could be indirectly called. The hardware will (when enabled)
+// trap when an indirect branch or call instruction targets an instruction
+// which is not a valid BTI instruction. This is intended to guard against
+// control-flow hijacking attacks.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMInstrInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "arm-branch-targets"
+#define ARM_BRANCH_TARGETS_NAME "ARM Branch Targets"
+
+namespace {
+class ARMBranchTargets : public MachineFunctionPass {
+public:
+ static char ID;
+ ARMBranchTargets() : MachineFunctionPass(ID) {}
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ StringRef getPassName() const override { return ARM_BRANCH_TARGETS_NAME; }
+
+private:
+ void addBTI(const ARMInstrInfo &TII, MachineBasicBlock &MBB, bool IsFirstBB);
+};
+} // end anonymous namespace
+
+char ARMBranchTargets::ID = 0;
+
+INITIALIZE_PASS(ARMBranchTargets, "arm-branch-targets", ARM_BRANCH_TARGETS_NAME,
+ false, false)
+
+void ARMBranchTargets::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+FunctionPass *llvm::createARMBranchTargetsPass() {
+ return new ARMBranchTargets();
+}
+
+bool ARMBranchTargets::runOnMachineFunction(MachineFunction &MF) {
+ if (!MF.getInfo<ARMFunctionInfo>()->branchTargetEnforcement())
+ return false;
+
+ LLVM_DEBUG(dbgs() << "********** ARM Branch Targets **********\n"
+ << "********** Function: " << MF.getName() << '\n');
+ const ARMInstrInfo &TII =
+ *static_cast<const ARMInstrInfo *>(MF.getSubtarget().getInstrInfo());
+
+ // LLVM does not consider basic blocks which are the targets of jump tables
+ // to be address-taken (the address can't escape anywhere else), but they are
+ // used for indirect branches, so need BTI instructions.
+ SmallPtrSet<const MachineBasicBlock *, 8> JumpTableTargets;
+ if (const MachineJumpTableInfo *JTI = MF.getJumpTableInfo())
+ for (const MachineJumpTableEntry &JTE : JTI->getJumpTables())
+ for (const MachineBasicBlock *MBB : JTE.MBBs)
+ JumpTableTargets.insert(MBB);
+
+ bool MadeChange = false;
+ for (MachineBasicBlock &MBB : MF) {
+ bool NeedBTI = false;
+ bool IsFirstBB = &MBB == &MF.front();
+
+ // Every function can potentially be called indirectly (even if it has
+ // static linkage, due to linker-generated veneers).
+ if (IsFirstBB)
+ NeedBTI = true;
+
+ // If the block itself is address-taken, or is an exception landing pad, it
+ // could be indirectly branched to.
+ if (MBB.hasAddressTaken() || MBB.isEHPad() || JumpTableTargets.count(&MBB))
+ NeedBTI = true;
+
+ if (NeedBTI) {
+ addBTI(TII, MBB, IsFirstBB);
+ MadeChange = true;
+ }
+ }
+
+ return MadeChange;
+}
+
+/// Insert a BTI/PACBTI instruction into a given basic block \c MBB. If
+/// \c IsFirstBB is true (meaning that this is the first BB in a function) try
+/// to find a PAC instruction and replace it with PACBTI. Otherwise just insert
+/// a BTI instruction.
+/// The point of insertion is in the beginning of the BB, immediately after meta
+/// instructions (such labels in exception handling landing pads).
+void ARMBranchTargets::addBTI(const ARMInstrInfo &TII, MachineBasicBlock &MBB,
+ bool IsFirstBB) {
+ // Which instruction to insert: BTI or PACBTI
+ unsigned OpCode = ARM::t2BTI;
+
+ // Skip meta instructions, including EH labels
+ auto MBBI = llvm::find_if_not(MBB.instrs(), [](const MachineInstr &MI) {
+ return MI.isMetaInstruction();
+ });
+
+ // If this is the first BB in a function, check if it starts with a PAC
+ // instruction and in that case remove the PAC instruction.
+ if (IsFirstBB) {
+ if (MBBI != MBB.instr_end() && MBBI->getOpcode() == ARM::t2PAC) {
+ LLVM_DEBUG(dbgs() << "Removing a 'PAC' instr from BB '" << MBB.getName()
+ << "' to replace with PACBTI\n");
+ OpCode = ARM::t2PACBTI;
+ auto NextMBBI = std::next(MBBI);
+ MBBI->eraseFromParent();
+ MBBI = NextMBBI;
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "Inserting a '"
+ << (OpCode == ARM::t2BTI ? "BTI" : "PACBTI")
+ << "' instr into BB '" << MBB.getName() << "'\n");
+ // Finally, insert a new instruction (either PAC or PACBTI)
+ BuildMI(MBB, MBBI, MBB.findDebugLoc(MBBI), TII.get(OpCode));
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
index 121558276c3e..c2ca4708c208 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -184,6 +184,9 @@ namespace {
/// base address.
DenseMap<int, int> JumpTableUserIndices;
+ // Maps a MachineBasicBlock to the number of jump tables entries.
+ DenseMap<const MachineBasicBlock *, int> BlockJumpTableRefCount;
+
/// ImmBranch - One per immediate branch, keeping the machine instruction
/// pointer, conditional or unconditional, the max displacement,
/// and (if isCond is true) the corresponding unconditional branch
@@ -274,7 +277,10 @@ namespace {
unsigned &DeadSize, bool &CanDeleteLEA,
bool &BaseRegKill);
bool optimizeThumb2JumpTables();
- MachineBasicBlock *adjustJTTargetBlockForward(MachineBasicBlock *BB,
+ void fixupBTI(unsigned JTI, MachineBasicBlock &OldBB,
+ MachineBasicBlock &NewBB);
+ MachineBasicBlock *adjustJTTargetBlockForward(unsigned JTI,
+ MachineBasicBlock *BB,
MachineBasicBlock *JTBB);
unsigned getUserOffset(CPUser&) const;
@@ -518,6 +524,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
CPEntries.clear();
JumpTableEntryIndices.clear();
JumpTableUserIndices.clear();
+ BlockJumpTableRefCount.clear();
ImmBranches.clear();
PushPopMIs.clear();
T2JumpTables.clear();
@@ -720,6 +727,14 @@ Align ARMConstantIslands::getCPEAlign(const MachineInstr *CPEMI) {
return MCP->getConstants()[CPI].getAlign();
}
+// Exception landing pads, blocks that has their adress taken, and function
+// entry blocks will always be (potential) indirect jump targets, regardless of
+// whether they are referenced by or not by jump tables.
+static bool isAlwaysIndirectTarget(const MachineBasicBlock &MBB) {
+ return MBB.isEHPad() || MBB.hasAddressTaken() ||
+ &MBB == &MBB.getParent()->front();
+}
+
/// scanFunctionJumpTables - Do a scan of the function, building up
/// information about the sizes of each block and the locations of all
/// the jump tables.
@@ -730,6 +745,20 @@ void ARMConstantIslands::scanFunctionJumpTables() {
(I.getOpcode() == ARM::t2BR_JT || I.getOpcode() == ARM::tBR_JTr))
T2JumpTables.push_back(&I);
}
+
+ if (!MF->getInfo<ARMFunctionInfo>()->branchTargetEnforcement())
+ return;
+
+ if (const MachineJumpTableInfo *JTI = MF->getJumpTableInfo())
+ for (const MachineJumpTableEntry &JTE : JTI->getJumpTables())
+ for (const MachineBasicBlock *MBB : JTE.MBBs) {
+ if (isAlwaysIndirectTarget(*MBB))
+ // Set the reference count essentially to infinity, it will never
+ // reach zero and the BTI Instruction will never be removed.
+ BlockJumpTableRefCount[MBB] = std::numeric_limits<int>::max();
+ else
+ ++BlockJumpTableRefCount[MBB];
+ }
}
/// initializeFunctionInfo - Do the initial scan of the function, building up
@@ -1219,9 +1248,9 @@ int ARMConstantIslands::findInRangeCPEntry(CPUser& U, unsigned UserOffset) {
// Point the CPUser node to the replacement
U.CPEMI = CPEs[i].CPEMI;
// Change the CPI in the instruction operand to refer to the clone.
- for (unsigned j = 0, e = UserMI->getNumOperands(); j != e; ++j)
- if (UserMI->getOperand(j).isCPI()) {
- UserMI->getOperand(j).setIndex(CPEs[i].CPI);
+ for (MachineOperand &MO : UserMI->operands())
+ if (MO.isCPI()) {
+ MO.setIndex(CPEs[i].CPI);
break;
}
// Adjust the refcount of the clone...
@@ -1601,9 +1630,9 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex,
BBUtils->adjustBBOffsetsAfter(&*--NewIsland->getIterator());
// Finally, change the CPI in the instruction operand to be ID.
- for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i)
- if (UserMI->getOperand(i).isCPI()) {
- UserMI->getOperand(i).setIndex(ID);
+ for (MachineOperand &MO : UserMI->operands())
+ if (MO.isCPI()) {
+ MO.setIndex(ID);
break;
}
@@ -2211,8 +2240,7 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() {
unsigned JTOffset = BBUtils->getOffsetOf(MI) + 4;
const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
BBInfoVector &BBInfo = BBUtils->getBBInfo();
- for (unsigned j = 0, ee = JTBBs.size(); j != ee; ++j) {
- MachineBasicBlock *MBB = JTBBs[j];
+ for (MachineBasicBlock *MBB : JTBBs) {
unsigned DstOffset = BBInfo[MBB->getNumber()].Offset;
// Negative offset is not ok. FIXME: We should change BB layout to make
// sure all the branches are forward.
@@ -2405,17 +2433,16 @@ bool ARMConstantIslands::reorderThumb2JumpTables() {
// and try to adjust them such that that's true.
int JTNumber = MI->getParent()->getNumber();
const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
- for (unsigned j = 0, ee = JTBBs.size(); j != ee; ++j) {
- MachineBasicBlock *MBB = JTBBs[j];
+ for (MachineBasicBlock *MBB : JTBBs) {
int DTNumber = MBB->getNumber();
if (DTNumber < JTNumber) {
// The destination precedes the switch. Try to move the block forward
// so we have a positive offset.
MachineBasicBlock *NewBB =
- adjustJTTargetBlockForward(MBB, MI->getParent());
+ adjustJTTargetBlockForward(JTI, MBB, MI->getParent());
if (NewBB)
- MJTI->ReplaceMBBInJumpTable(JTI, JTBBs[j], NewBB);
+ MJTI->ReplaceMBBInJumpTable(JTI, MBB, NewBB);
MadeChange = true;
}
}
@@ -2424,8 +2451,40 @@ bool ARMConstantIslands::reorderThumb2JumpTables() {
return MadeChange;
}
-MachineBasicBlock *ARMConstantIslands::
-adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) {
+void ARMConstantIslands::fixupBTI(unsigned JTI, MachineBasicBlock &OldBB,
+ MachineBasicBlock &NewBB) {
+ assert(isThumb2 && "BTI in Thumb1?");
+
+ // Insert a BTI instruction into NewBB
+ BuildMI(NewBB, NewBB.begin(), DebugLoc(), TII->get(ARM::t2BTI));
+
+ // Update jump table reference counts.
+ const MachineJumpTableInfo &MJTI = *MF->getJumpTableInfo();
+ const MachineJumpTableEntry &JTE = MJTI.getJumpTables()[JTI];
+ for (const MachineBasicBlock *MBB : JTE.MBBs) {
+ if (MBB != &OldBB)
+ continue;
+ --BlockJumpTableRefCount[MBB];
+ ++BlockJumpTableRefCount[&NewBB];
+ }
+
+ // If the old basic block reference count dropped to zero, remove
+ // the BTI instruction at its beginning.
+ if (BlockJumpTableRefCount[&OldBB] > 0)
+ return;
+
+ // Skip meta instructions
+ auto BTIPos = llvm::find_if_not(OldBB.instrs(), [](const MachineInstr &MI) {
+ return MI.isMetaInstruction();
+ });
+ assert(BTIPos->getOpcode() == ARM::t2BTI &&
+ "BasicBlock is mentioned in a jump table but does start with BTI");
+ if (BTIPos->getOpcode() == ARM::t2BTI)
+ BTIPos->eraseFromParent();
+}
+
+MachineBasicBlock *ARMConstantIslands::adjustJTTargetBlockForward(
+ unsigned JTI, MachineBasicBlock *BB, MachineBasicBlock *JTBB) {
// If the destination block is terminated by an unconditional branch,
// try to move it; otherwise, create a new block following the jump
// table that branches back to the actual target. This is a very simple
@@ -2483,6 +2542,9 @@ adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) {
NewBB->addSuccessor(BB);
JTBB->replaceSuccessor(BB, NewBB);
+ if (MF->getInfo<ARMFunctionInfo>()->branchTargetEnforcement())
+ fixupBTI(JTI, *BB, *NewBB);
+
++NumJTInserted;
return NewBB;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index a8f09969e948..7a35f252b22a 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -125,9 +125,8 @@ void ARMExpandPseudo::TransferImpOps(MachineInstr &OldMI,
MachineInstrBuilder &UseMI,
MachineInstrBuilder &DefMI) {
const MCInstrDesc &Desc = OldMI.getDesc();
- for (unsigned i = Desc.getNumOperands(), e = OldMI.getNumOperands();
- i != e; ++i) {
- const MachineOperand &MO = OldMI.getOperand(i);
+ for (const MachineOperand &MO :
+ llvm::drop_begin(OldMI.operands(), Desc.getNumOperands())) {
assert(MO.isReg() && MO.getReg());
if (MO.isUse())
UseMI.add(MO);
@@ -2252,8 +2251,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
.add(predOps(ARMCC::AL))
.addReg(JumpReg, RegState::Kill);
- for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
- NewCall->addOperand(MI.getOperand(I));
+ for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
+ NewCall->addOperand(MO);
if (MI.isCandidateForCallSiteEntry())
MI.getMF()->moveCallSiteInfo(&MI, NewCall.getInstr());
@@ -2524,17 +2523,21 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::LDRLIT_ga_pcrel:
case ARM::LDRLIT_ga_pcrel_ldr:
case ARM::tLDRLIT_ga_abs:
+ case ARM::t2LDRLIT_ga_pcrel:
case ARM::tLDRLIT_ga_pcrel: {
Register DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
const MachineOperand &MO1 = MI.getOperand(1);
auto Flags = MO1.getTargetFlags();
const GlobalValue *GV = MO1.getGlobal();
- bool IsARM =
- Opcode != ARM::tLDRLIT_ga_pcrel && Opcode != ARM::tLDRLIT_ga_abs;
+ bool IsARM = Opcode != ARM::tLDRLIT_ga_pcrel &&
+ Opcode != ARM::tLDRLIT_ga_abs &&
+ Opcode != ARM::t2LDRLIT_ga_pcrel;
bool IsPIC =
Opcode != ARM::LDRLIT_ga_abs && Opcode != ARM::tLDRLIT_ga_abs;
unsigned LDRLITOpc = IsARM ? ARM::LDRi12 : ARM::tLDRpci;
+ if (Opcode == ARM::t2LDRLIT_ga_pcrel)
+ LDRLITOpc = ARM::t2LDRpci;
unsigned PICAddOpc =
IsARM
? (Opcode == ARM::LDRLIT_ga_pcrel_ldr ? ARM::PICLDR : ARM::PICADD)
@@ -3065,7 +3068,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::BL));
}
MIB.cloneMemRefs(MI);
- for (unsigned i = 1; i < MI.getNumOperands(); ++i) MIB.add(MI.getOperand(i));
+ for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
+ MIB.add(MO);
MI.eraseFromParent();
return true;
}
@@ -3080,8 +3084,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
Opcode == ARM::LOADDUAL ? RegState::Define : 0)
.addReg(TRI->getSubReg(PairReg, ARM::gsub_1),
Opcode == ARM::LOADDUAL ? RegState::Define : 0);
- for (unsigned i = 1; i < MI.getNumOperands(); i++)
- MIB.add(MI.getOperand(i));
+ for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
+ MIB.add(MO);
MIB.add(predOps(ARMCC::AL));
MIB.cloneMemRefs(MI);
MI.eraseFromParent();
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index 025e43444f9c..b866cf952ff1 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -523,9 +523,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
}
// Determine spill area sizes.
- for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- unsigned Reg = CSI[i].getReg();
- int FI = CSI[i].getFrameIdx();
+ for (const CalleeSavedInfo &I : CSI) {
+ unsigned Reg = I.getReg();
+ int FI = I.getFrameIdx();
switch (Reg) {
case ARM::R8:
case ARM::R9:
@@ -1317,11 +1317,11 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
// Mark the D-register spill slots as properly aligned. Since MFI computes
// stack slot layout backwards, this can actually mean that the d-reg stack
// slot offsets can be wrong. The offset for d8 will always be correct.
- for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- unsigned DNum = CSI[i].getReg() - ARM::D8;
+ for (const CalleeSavedInfo &I : CSI) {
+ unsigned DNum = I.getReg() - ARM::D8;
if (DNum > NumAlignedDPRCS2Regs - 1)
continue;
- int FI = CSI[i].getFrameIdx();
+ int FI = I.getFrameIdx();
// The even-numbered registers will be 16-byte aligned, the odd-numbered
// registers will be 8-byte aligned.
MFI.setObjectAlignment(FI, DNum % 2 ? Align(8) : Align(16));
@@ -1488,9 +1488,9 @@ static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB,
// Find the frame index assigned to d8.
int D8SpillFI = 0;
- for (unsigned i = 0, e = CSI.size(); i != e; ++i)
- if (CSI[i].getReg() == ARM::D8) {
- D8SpillFI = CSI[i].getFrameIdx();
+ for (const CalleeSavedInfo &I : CSI)
+ if (I.getReg() == ARM::D8) {
+ D8SpillFI = I.getFrameIdx();
break;
}
@@ -1693,7 +1693,7 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF,
// Default 12 bit limit.
break;
case ARMII::AddrMode3:
- case ARMII::AddrModeT2_i8:
+ case ARMII::AddrModeT2_i8neg:
Limit = std::min(Limit, (1U << 8) - 1);
break;
case ARMII::AddrMode5FP16:
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 2b83a292db76..bb2859c766c2 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -3274,7 +3274,8 @@ bool ARMDAGToDAGISel::tryFP_TO_INT(SDNode *N, SDLoc dl) {
return false;
unsigned int ScalarBits = Type.getScalarSizeInBits();
- bool IsUnsigned = N->getOpcode() == ISD::FP_TO_UINT;
+ bool IsUnsigned = N->getOpcode() == ISD::FP_TO_UINT ||
+ N->getOpcode() == ISD::FP_TO_UINT_SAT;
SDNode *Node = N->getOperand(0).getNode();
// floating-point to fixed-point with one fractional bit gets turned into an
@@ -3764,6 +3765,8 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
break;
case ISD::FP_TO_UINT:
case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT_SAT:
+ case ISD::FP_TO_SINT_SAT:
if (tryFP_TO_INT(N, dl))
return;
break;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp
index e7e10ce07a44..33d115945614 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -1016,6 +1016,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::FP_EXTEND);
setTargetDAGCombine(ISD::SELECT);
setTargetDAGCombine(ISD::SELECT_CC);
+ setTargetDAGCombine(ISD::SETCC);
+ }
+ if (Subtarget->hasMVEFloatOps()) {
+ setTargetDAGCombine(ISD::FADD);
}
if (!Subtarget->hasFP64()) {
@@ -10587,10 +10591,9 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
LPadList.reserve(CallSiteNumToLPad.size());
for (unsigned I = 1; I <= MaxCSNum; ++I) {
SmallVectorImpl<MachineBasicBlock*> &MBBList = CallSiteNumToLPad[I];
- for (SmallVectorImpl<MachineBasicBlock*>::iterator
- II = MBBList.begin(), IE = MBBList.end(); II != IE; ++II) {
- LPadList.push_back(*II);
- InvokeBBs.insert((*II)->pred_begin(), (*II)->pred_end());
+ for (MachineBasicBlock *MBB : MBBList) {
+ LPadList.push_back(MBB);
+ InvokeBBs.insert(MBB->pred_begin(), MBB->pred_end());
}
}
@@ -10879,9 +10882,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
// Add the jump table entries as successors to the MBB.
SmallPtrSet<MachineBasicBlock*, 8> SeenMBBs;
- for (std::vector<MachineBasicBlock*>::iterator
- I = LPadList.begin(), E = LPadList.end(); I != E; ++I) {
- MachineBasicBlock *CurMBB = *I;
+ for (MachineBasicBlock *CurMBB : LPadList) {
if (SeenMBBs.insert(CurMBB).second)
DispContBB->addSuccessor(CurMBB);
}
@@ -10943,9 +10944,8 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
// Mark all former landing pads as non-landing pads. The dispatch is the only
// landing pad now.
- for (SmallVectorImpl<MachineBasicBlock*>::iterator
- I = MBBLPads.begin(), E = MBBLPads.end(); I != E; ++I)
- (*I)->setIsEHPad(false);
+ for (MachineBasicBlock *MBBLPad : MBBLPads)
+ MBBLPad->setIsEHPad(false);
// The instruction is gone now.
MI.eraseFromParent();
@@ -11771,8 +11771,8 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case ARM::STRH_preidx: NewOpc = ARM::STRH_PRE; break;
}
MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc));
- for (unsigned i = 0; i < MI.getNumOperands(); ++i)
- MIB.add(MI.getOperand(i));
+ for (const MachineOperand &MO : MI.operands())
+ MIB.add(MO);
MI.eraseFromParent();
return BB;
}
@@ -13083,6 +13083,65 @@ static SDValue PerformVSELECTCombine(SDNode *N,
return DCI.DAG.getNode(ISD::VSELECT, SDLoc(N), Type, Cond, RHS, LHS);
}
+// Convert vsetcc([0,1,2,..], splat(n), ult) -> vctp n
+static SDValue PerformVSetCCToVCTPCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ EVT VT = N->getValueType(0);
+
+ if (!Subtarget->hasMVEIntegerOps() ||
+ !DCI.DAG.getTargetLoweringInfo().isTypeLegal(VT))
+ return SDValue();
+
+ if (CC == ISD::SETUGE) {
+ std::swap(Op0, Op1);
+ CC = ISD::SETULT;
+ }
+
+ if (CC != ISD::SETULT || VT.getScalarSizeInBits() != 1 ||
+ Op0.getOpcode() != ISD::BUILD_VECTOR)
+ return SDValue();
+
+ // Check first operand is BuildVector of 0,1,2,...
+ for (unsigned I = 0; I < VT.getVectorNumElements(); I++) {
+ if (!Op0.getOperand(I).isUndef() &&
+ !(isa<ConstantSDNode>(Op0.getOperand(I)) &&
+ Op0.getConstantOperandVal(I) == I))
+ return SDValue();
+ }
+
+ // The second is a Splat of Op1S
+ SDValue Op1S = DCI.DAG.getSplatValue(Op1);
+ if (!Op1S)
+ return SDValue();
+
+ unsigned Opc;
+ switch (VT.getVectorNumElements()) {
+ case 2:
+ Opc = Intrinsic::arm_mve_vctp64;
+ break;
+ case 4:
+ Opc = Intrinsic::arm_mve_vctp32;
+ break;
+ case 8:
+ Opc = Intrinsic::arm_mve_vctp16;
+ break;
+ case 16:
+ Opc = Intrinsic::arm_mve_vctp8;
+ break;
+ default:
+ return SDValue();
+ }
+
+ SDLoc DL(N);
+ return DCI.DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+ DCI.DAG.getConstant(Opc, DL, MVT::i32),
+ DCI.DAG.getZExtOrTrunc(Op1S, DL, MVT::i32));
+}
+
static SDValue PerformABSCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
@@ -13427,6 +13486,26 @@ bool ARMTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
return VT.isScalarInteger();
}
+bool ARMTargetLowering::shouldConvertFpToSat(unsigned Op, EVT FPVT,
+ EVT VT) const {
+ if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
+ return false;
+
+ switch (FPVT.getSimpleVT().SimpleTy) {
+ case MVT::f16:
+ return Subtarget->hasVFP2Base();
+ case MVT::f32:
+ return Subtarget->hasVFP2Base();
+ case MVT::f64:
+ return Subtarget->hasFP64();
+ case MVT::v4f32:
+ case MVT::v8f16:
+ return Subtarget->hasMVEFloatOps();
+ default:
+ return false;
+ }
+}
+
static SDValue PerformSHLSimplify(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *ST) {
@@ -14485,6 +14564,52 @@ static SDValue PerformBFICombine(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
+// Check that N is CMPZ(CSINC(0, 0, CC, X)), return X if valid.
+static SDValue IsCMPZCSINC(SDNode *Cmp, ARMCC::CondCodes &CC) {
+ if (Cmp->getOpcode() != ARMISD::CMPZ || !isNullConstant(Cmp->getOperand(1)))
+ return SDValue();
+ SDValue CSInc = Cmp->getOperand(0);
+ if (CSInc.getOpcode() != ARMISD::CSINC ||
+ !isNullConstant(CSInc.getOperand(0)) ||
+ !isNullConstant(CSInc.getOperand(1)) || !CSInc->hasOneUse())
+ return SDValue();
+ CC = (ARMCC::CondCodes)CSInc.getConstantOperandVal(2);
+ return CSInc.getOperand(3);
+}
+
+static SDValue PerformCMPZCombine(SDNode *N, SelectionDAG &DAG) {
+ // Given CMPZ(CSINC(C, 0, 0, EQ), 0), we can just use C directly. As in
+ // t92: glue = ARMISD::CMPZ t74, 0
+ // t93: i32 = ARMISD::CSINC 0, 0, 1, t92
+ // t96: glue = ARMISD::CMPZ t93, 0
+ // t114: i32 = ARMISD::CSINV 0, 0, 0, t96
+ ARMCC::CondCodes Cond;
+ if (SDValue C = IsCMPZCSINC(N, Cond))
+ if (Cond == ARMCC::EQ)
+ return C;
+ return SDValue();
+}
+
+static SDValue PerformCSETCombine(SDNode *N, SelectionDAG &DAG) {
+ // Fold away an unneccessary CMPZ/CSINC
+ // CSXYZ A, B, C1 (CMPZ (CSINC 0, 0, C2, D), 0) ->
+ // if C1==EQ -> CSXYZ A, B, C2, D
+ // if C1==NE -> CSXYZ A, B, NOT(C2), D
+ ARMCC::CondCodes Cond;
+ if (SDValue C = IsCMPZCSINC(N->getOperand(3).getNode(), Cond)) {
+ if (N->getConstantOperandVal(2) == ARMCC::EQ)
+ return DAG.getNode(N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0),
+ N->getOperand(1),
+ DAG.getConstant(Cond, SDLoc(N), MVT::i32), C);
+ if (N->getConstantOperandVal(2) == ARMCC::NE)
+ return DAG.getNode(
+ N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0),
+ N->getOperand(1),
+ DAG.getConstant(ARMCC::getOppositeCondition(Cond), SDLoc(N), MVT::i32), C);
+ }
+ return SDValue();
+}
+
/// PerformVMOVRRDCombine - Target-specific dag combine xforms for
/// ARMISD::VMOVRRD.
static SDValue PerformVMOVRRDCombine(SDNode *N,
@@ -16411,6 +16536,42 @@ static SDValue PerformVCVTCombine(SDNode *N, SelectionDAG &DAG,
return FixConv;
}
+static SDValue PerformFAddVSelectCombine(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) {
+ if (!Subtarget->hasMVEFloatOps())
+ return SDValue();
+
+ // Turn (fadd x, (vselect c, y, -0.0)) into (vselect c, (fadd x, y), x)
+ // The second form can be more easily turned into a predicated vadd, and
+ // possibly combined into a fma to become a predicated vfma.
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ // The identity element for a fadd is -0.0, which these VMOV's represent.
+ auto isNegativeZeroSplat = [&](SDValue Op) {
+ if (Op.getOpcode() != ISD::BITCAST ||
+ Op.getOperand(0).getOpcode() != ARMISD::VMOVIMM)
+ return false;
+ if (VT == MVT::v4f32 && Op.getOperand(0).getConstantOperandVal(0) == 1664)
+ return true;
+ if (VT == MVT::v8f16 && Op.getOperand(0).getConstantOperandVal(0) == 2688)
+ return true;
+ return false;
+ };
+
+ if (Op0.getOpcode() == ISD::VSELECT && Op1.getOpcode() != ISD::VSELECT)
+ std::swap(Op0, Op1);
+
+ if (Op1.getOpcode() != ISD::VSELECT ||
+ !isNegativeZeroSplat(Op1.getOperand(2)))
+ return SDValue();
+ SDValue FAdd =
+ DAG.getNode(ISD::FADD, DL, VT, Op0, Op1.getOperand(1), N->getFlags());
+ return DAG.getNode(ISD::VSELECT, DL, VT, Op1.getOperand(0), FAdd, Op0);
+}
+
/// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD)
/// can replace combinations of VCVT (integer to floating-point) and VDIV
/// when the VDIV has a constant operand that is a power of 2.
@@ -17049,18 +17210,6 @@ static SDValue PerformShiftCombine(SDNode *N,
const ARMSubtarget *ST) {
SelectionDAG &DAG = DCI.DAG;
EVT VT = N->getValueType(0);
- if (N->getOpcode() == ISD::SRL && VT == MVT::i32 && ST->hasV6Ops()) {
- // Canonicalize (srl (bswap x), 16) to (rotr (bswap x), 16) if the high
- // 16-bits of x is zero. This optimizes rev + lsr 16 to rev16.
- SDValue N1 = N->getOperand(1);
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
- SDValue N0 = N->getOperand(0);
- if (C->getZExtValue() == 16 && N0.getOpcode() == ISD::BSWAP &&
- DAG.MaskedValueIsZero(N0.getOperand(0),
- APInt::getHighBitsSet(32, 16)))
- return DAG.getNode(ISD::ROTR, SDLoc(N), VT, N0, N1);
- }
- }
if (ST->isThumb1Only() && N->getOpcode() == ISD::SHL && VT == MVT::i32 &&
N->getOperand(0)->getOpcode() == ISD::AND &&
@@ -18173,6 +18322,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SELECT_CC:
case ISD::SELECT: return PerformSELECTCombine(N, DCI, Subtarget);
case ISD::VSELECT: return PerformVSELECTCombine(N, DCI, Subtarget);
+ case ISD::SETCC: return PerformVSetCCToVCTPCombine(N, DCI, Subtarget);
case ISD::ABS: return PerformABSCombine(N, DCI, Subtarget);
case ARMISD::ADDE: return PerformADDECombine(N, DCI, Subtarget);
case ARMISD::UMLAL: return PerformUMLALCombine(N, DCI.DAG, Subtarget);
@@ -18205,6 +18355,8 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
return PerformVCVTCombine(N, DCI.DAG, Subtarget);
+ case ISD::FADD:
+ return PerformFAddVSelectCombine(N, DCI.DAG, Subtarget);
case ISD::FDIV:
return PerformVDIVCombine(N, DCI.DAG, Subtarget);
case ISD::INTRINSIC_WO_CHAIN:
@@ -18228,6 +18380,12 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
return PerformCMOVCombine(N, DCI.DAG);
case ARMISD::BRCOND:
return PerformBRCONDCombine(N, DCI.DAG);
+ case ARMISD::CMPZ:
+ return PerformCMPZCombine(N, DCI.DAG);
+ case ARMISD::CSINC:
+ case ARMISD::CSINV:
+ case ARMISD::CSNEG:
+ return PerformCSETCombine(N, DCI.DAG);
case ISD::LOAD:
return PerformLOADCombine(N, DCI, Subtarget);
case ARMISD::VLD1DUP:
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.h b/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.h
index 0fddd58e178e..e3b422358cae 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -736,6 +736,8 @@ class VectorType;
bool preferIncOfAddToSubOfNot(EVT VT) const override;
+ bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override;
+
protected:
std::pair<const TargetRegisterClass *, uint8_t>
findRepresentativeClass(const TargetRegisterInfo *TRI,
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrFormats.td
index de351372abf2..ff5afd787c82 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrFormats.td
@@ -103,15 +103,17 @@ def AddrModeT1_4 : AddrMode<9>;
def AddrModeT1_s : AddrMode<10>;
def AddrModeT2_i12 : AddrMode<11>;
def AddrModeT2_i8 : AddrMode<12>;
-def AddrModeT2_so : AddrMode<13>;
-def AddrModeT2_pc : AddrMode<14>;
-def AddrModeT2_i8s4 : AddrMode<15>;
-def AddrMode_i12 : AddrMode<16>;
-def AddrMode5FP16 : AddrMode<17>;
-def AddrModeT2_ldrex : AddrMode<18>;
-def AddrModeT2_i7s4 : AddrMode<19>;
-def AddrModeT2_i7s2 : AddrMode<20>;
-def AddrModeT2_i7 : AddrMode<21>;
+def AddrModeT2_i8pos : AddrMode<13>;
+def AddrModeT2_i8neg : AddrMode<14>;
+def AddrModeT2_so : AddrMode<15>;
+def AddrModeT2_pc : AddrMode<16>;
+def AddrModeT2_i8s4 : AddrMode<17>;
+def AddrMode_i12 : AddrMode<18>;
+def AddrMode5FP16 : AddrMode<19>;
+def AddrModeT2_ldrex : AddrMode<20>;
+def AddrModeT2_i7s4 : AddrMode<21>;
+def AddrModeT2_i7s2 : AddrMode<22>;
+def AddrModeT2_i7 : AddrMode<23>;
// Load / store index mode.
class IndexMode<bits<2> val> {
@@ -1392,9 +1394,12 @@ class T2I<dag oops, dag iops, InstrItinClass itin,
class T2Ii12<dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: Thumb2I<oops, iops, AddrModeT2_i12, 4, itin, opc, asm, "",pattern>;
-class T2Ii8<dag oops, dag iops, InstrItinClass itin,
- string opc, string asm, list<dag> pattern>
- : Thumb2I<oops, iops, AddrModeT2_i8, 4, itin, opc, asm, "", pattern>;
+class T2Ii8p<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : Thumb2I<oops, iops, AddrModeT2_i8pos, 4, itin, opc, asm, "", pattern>;
+class T2Ii8n<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : Thumb2I<oops, iops, AddrModeT2_i8neg, 4, itin, opc, asm, "", pattern>;
class T2Iso<dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: Thumb2I<oops, iops, AddrModeT2_so, 4, itin, opc, asm, "", pattern>;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrInfo.td
index 7d0bc756e882..1c1db473f866 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -420,6 +420,12 @@ def lo16AllZero : PatLeaf<(i32 imm), [{
return (((uint32_t)N->getZExtValue()) & 0xFFFFUL) == 0;
}], hi16>;
+// top16Zero - answer true if the upper 16 bits of $src are 0, false otherwise
+def top16Zero: PatLeaf<(i32 GPR:$src), [{
+ return !SDValue(N,0)->getValueType(0).isVector() &&
+ CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 16));
+ }]>;
+
class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>;
class UnOpFrag <dag res> : PatFrag<(ops node:$Src), res>;
@@ -4748,6 +4754,8 @@ def : ARMV6Pat<(srl (bswap (extloadi16 addrmode3:$addr)), (i32 16)),
(REV16 (LDRH addrmode3:$addr))>;
def : ARMV6Pat<(truncstorei16 (srl (bswap GPR:$Rn), (i32 16)), addrmode3:$addr),
(STRH (REV16 GPR:$Rn), addrmode3:$addr)>;
+def : ARMV6Pat<(srl (bswap top16Zero:$Rn), (i32 16)),
+ (REV16 GPR:$Rn)>;
let AddedComplexity = 5 in
def REVSH : AMiscA1I<0b01101111, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm),
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrMVE.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrMVE.td
index 697730037277..f53814a80e01 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -3621,21 +3621,24 @@ class MVE_VMUL_fp<string iname, string suffix, bits<2> size, list<dag> pattern=[
let validForTailPredication = 1;
}
-multiclass MVE_VMULT_fp_m<string iname, MVEVectorVTInfo VTI,
- SDNode Op, Intrinsic PredInt> {
+multiclass MVE_VMULT_fp_m<string iname, MVEVectorVTInfo VTI, SDNode Op,
+ Intrinsic PredInt, SDPatternOperator IdentityVec> {
def "" : MVE_VMUL_fp<iname, VTI.Suffix, VTI.Size>;
defvar Inst = !cast<Instruction>(NAME);
let Predicates = [HasMVEFloat] in {
- defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? ), !cast<Instruction>(NAME)>;
+ defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? ), !cast<Instruction>(NAME), IdentityVec>;
}
}
-multiclass MVE_VMUL_fp_m<MVEVectorVTInfo VTI>
- : MVE_VMULT_fp_m<"vmul", VTI, fmul, int_arm_mve_mul_predicated>;
+multiclass MVE_VMUL_fp_m<MVEVectorVTInfo VTI, SDPatternOperator IdentityVec>
+ : MVE_VMULT_fp_m<"vmul", VTI, fmul, int_arm_mve_mul_predicated, IdentityVec>;
+
+def ARMimmOneF: PatLeaf<(bitconvert (v4f32 (ARMvmovFPImm (i32 112))))>; // 1.0 float
+def ARMimmOneH: PatLeaf<(bitconvert (v8i16 (ARMvmovImm (i32 2620))))>; // 1.0 half
-defm MVE_VMULf32 : MVE_VMUL_fp_m<MVE_v4f32>;
-defm MVE_VMULf16 : MVE_VMUL_fp_m<MVE_v8f16>;
+defm MVE_VMULf32 : MVE_VMUL_fp_m<MVE_v4f32, ARMimmOneF>;
+defm MVE_VMULf16 : MVE_VMUL_fp_m<MVE_v8f16, ARMimmOneH>;
class MVE_VCMLA<string suffix, bits<2> size>
: MVEFloatArithNeon<"vcmla", suffix, size{1}, (outs MQPR:$Qd),
@@ -3747,27 +3750,30 @@ defm MVE_VFMSf32 : MVE_VFMA_fp_multi<"vfms", 1, MVE_v4f32>;
defm MVE_VFMSf16 : MVE_VFMA_fp_multi<"vfms", 1, MVE_v8f16>;
multiclass MVE_VADDSUB_fp_m<string iname, bit bit_21, MVEVectorVTInfo VTI,
- SDNode Op, Intrinsic PredInt> {
+ SDNode Op, Intrinsic PredInt, SDPatternOperator IdentityVec> {
def "" : MVE_VADDSUBFMA_fp<iname, VTI.Suffix, VTI.Size, 0, 1, bit_21> {
let validForTailPredication = 1;
}
defvar Inst = !cast<Instruction>(NAME);
let Predicates = [HasMVEFloat] in {
- defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? ), !cast<Instruction>(NAME)>;
+ defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? ), !cast<Instruction>(NAME), IdentityVec>;
}
}
-multiclass MVE_VADD_fp_m<MVEVectorVTInfo VTI>
- : MVE_VADDSUB_fp_m<"vadd", 0, VTI, fadd, int_arm_mve_add_predicated>;
-multiclass MVE_VSUB_fp_m<MVEVectorVTInfo VTI>
- : MVE_VADDSUB_fp_m<"vsub", 1, VTI, fsub, int_arm_mve_sub_predicated>;
+multiclass MVE_VADD_fp_m<MVEVectorVTInfo VTI, SDPatternOperator IdentityVec>
+ : MVE_VADDSUB_fp_m<"vadd", 0, VTI, fadd, int_arm_mve_add_predicated, IdentityVec>;
+multiclass MVE_VSUB_fp_m<MVEVectorVTInfo VTI, SDPatternOperator IdentityVec>
+ : MVE_VADDSUB_fp_m<"vsub", 1, VTI, fsub, int_arm_mve_sub_predicated, IdentityVec>;
-defm MVE_VADDf32 : MVE_VADD_fp_m<MVE_v4f32>;
-defm MVE_VADDf16 : MVE_VADD_fp_m<MVE_v8f16>;
+def ARMimmMinusZeroF: PatLeaf<(bitconvert (v4i32 (ARMvmovImm (i32 1664))))>; // -0.0 float
+def ARMimmMinusZeroH: PatLeaf<(bitconvert (v8i16 (ARMvmovImm (i32 2688))))>; // -0.0 half
-defm MVE_VSUBf32 : MVE_VSUB_fp_m<MVE_v4f32>;
-defm MVE_VSUBf16 : MVE_VSUB_fp_m<MVE_v8f16>;
+defm MVE_VADDf32 : MVE_VADD_fp_m<MVE_v4f32, ARMimmMinusZeroF>;
+defm MVE_VADDf16 : MVE_VADD_fp_m<MVE_v8f16, ARMimmMinusZeroH>;
+
+defm MVE_VSUBf32 : MVE_VSUB_fp_m<MVE_v4f32, ARMimmAllZerosV>;
+defm MVE_VSUBf16 : MVE_VSUB_fp_m<MVE_v8f16, ARMimmAllZerosV>;
class MVE_VCADD<string suffix, bits<2> size, string cstr="">
: MVEFloatArithNeon<"vcadd", suffix, size{1}, (outs MQPR:$Qd),
@@ -5373,22 +5379,22 @@ defm MVE_VHSUB_qr_u16 : MVE_VHSUB_qr_m<MVE_v8u16>;
defm MVE_VHSUB_qr_u32 : MVE_VHSUB_qr_m<MVE_v4u32>;
multiclass MVE_VADDSUB_qr_f<string iname, MVEVectorVTInfo VTI, bit subtract,
- SDNode Op, Intrinsic PredInt> {
+ SDNode Op, Intrinsic PredInt, SDPatternOperator IdentityVec> {
def "" : MVE_VxADDSUB_qr<iname, VTI.Suffix, VTI.Size{0}, 0b11, subtract, VTI.Size>;
defm : MVE_TwoOpPatternDup<VTI, Op, PredInt, (? ),
- !cast<Instruction>(NAME)>;
+ !cast<Instruction>(NAME), IdentityVec>;
}
let Predicates = [HasMVEFloat] in {
defm MVE_VADD_qr_f32 : MVE_VADDSUB_qr_f<"vadd", MVE_v4f32, 0b0, fadd,
- int_arm_mve_add_predicated>;
+ int_arm_mve_add_predicated, ARMimmMinusZeroF>;
defm MVE_VADD_qr_f16 : MVE_VADDSUB_qr_f<"vadd", MVE_v8f16, 0b0, fadd,
- int_arm_mve_add_predicated>;
+ int_arm_mve_add_predicated, ARMimmMinusZeroH>;
defm MVE_VSUB_qr_f32 : MVE_VADDSUB_qr_f<"vsub", MVE_v4f32, 0b1, fsub,
- int_arm_mve_sub_predicated>;
+ int_arm_mve_sub_predicated, ARMimmAllZerosV>;
defm MVE_VSUB_qr_f16 : MVE_VADDSUB_qr_f<"vsub", MVE_v8f16, 0b1, fsub,
- int_arm_mve_sub_predicated>;
+ int_arm_mve_sub_predicated, ARMimmAllZerosV>;
}
class MVE_VxSHL_qr<string iname, string suffix, bit U, bits<2> size,
@@ -5567,16 +5573,16 @@ defm MVE_VQRDMULH_qr_s8 : MVE_VQRDMULH_qr_m<MVE_v16s8>;
defm MVE_VQRDMULH_qr_s16 : MVE_VQRDMULH_qr_m<MVE_v8s16>;
defm MVE_VQRDMULH_qr_s32 : MVE_VQRDMULH_qr_m<MVE_v4s32>;
-multiclass MVE_VxxMUL_qr_f_m<MVEVectorVTInfo VTI> {
+multiclass MVE_VxxMUL_qr_f_m<MVEVectorVTInfo VTI, SDPatternOperator IdentityVec> {
let validForTailPredication = 1 in
def "" : MVE_VxxMUL_qr<"vmul", VTI.Suffix, VTI.Size{0}, 0b11, VTI.Size>;
defm : MVE_TwoOpPatternDup<VTI, fmul, int_arm_mve_mul_predicated, (? ),
- !cast<Instruction>(NAME)>;
+ !cast<Instruction>(NAME), IdentityVec>;
}
let Predicates = [HasMVEFloat] in {
- defm MVE_VMUL_qr_f16 : MVE_VxxMUL_qr_f_m<MVE_v8f16>;
- defm MVE_VMUL_qr_f32 : MVE_VxxMUL_qr_f_m<MVE_v4f32>;
+ defm MVE_VMUL_qr_f16 : MVE_VxxMUL_qr_f_m<MVE_v8f16, ARMimmOneH>;
+ defm MVE_VMUL_qr_f32 : MVE_VxxMUL_qr_f_m<MVE_v4f32, ARMimmOneF>;
}
class MVE_VFMAMLA_qr<string iname, string suffix,
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrThumb.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrThumb.td
index bf717a4056e9..f09ad8167600 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrThumb.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrThumb.td
@@ -1576,6 +1576,8 @@ def : T1Pat<(srl (bswap (extloadi16 t_addrmode_is2:$addr)), (i32 16)),
(tREV16 (tLDRHi t_addrmode_is2:$addr))>;
def : T1Pat<(srl (bswap (extloadi16 t_addrmode_rr:$addr)), (i32 16)),
(tREV16 (tLDRHr t_addrmode_rr:$addr))>;
+def : T1Pat<(srl (bswap top16Zero:$Rn), (i32 16)),
+ (tREV16 tGPR:$Rn)>;
def : T1Pat<(truncstorei16 (srl (bswap tGPR:$Rn), (i32 16)),
t_addrmode_is2:$addr),
(tSTRHi(tREV16 tGPR:$Rn), t_addrmode_is2:$addr)>;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrThumb2.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrThumb2.td
index 783db9dde17f..4471317f4ea4 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -1191,9 +1191,9 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc,
let DecoderMethod = "DecodeT2LoadImm12";
}
- def i8 : T2Ii8 <(outs target:$Rt), (ins t2addrmode_negimm8:$addr), iii,
- opc, "\t$Rt, $addr",
- [(set target:$Rt, (opnode t2addrmode_negimm8:$addr))]>,
+ def i8 : T2Ii8n <(outs target:$Rt), (ins t2addrmode_negimm8:$addr), iii,
+ opc, "\t$Rt, $addr",
+ [(set target:$Rt, (opnode t2addrmode_negimm8:$addr))]>,
Sched<[WriteLd]> {
bits<4> Rt;
bits<13> addr;
@@ -1284,9 +1284,9 @@ multiclass T2I_st<bits<2> opcod, string opc,
let Inst{23} = addr{12}; // U
let Inst{11-0} = addr{11-0}; // imm
}
- def i8 : T2Ii8 <(outs), (ins target:$Rt, t2addrmode_negimm8:$addr), iii,
- opc, "\t$Rt, $addr",
- [(opnode target:$Rt, t2addrmode_negimm8:$addr)]>,
+ def i8 : T2Ii8n <(outs), (ins target:$Rt, t2addrmode_negimm8:$addr), iii,
+ opc, "\t$Rt, $addr",
+ [(opnode target:$Rt, t2addrmode_negimm8:$addr)]>,
Sched<[WriteST]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0000;
@@ -1580,8 +1580,8 @@ def t2LDR_POST_imm : t2AsmPseudo<"ldr${p}.w $Rt, $Rn, $imm",
// LDRT, LDRBT, LDRHT, LDRSBT, LDRSHT all have offset mode (PUW=0b110).
// Ref: A8.6.57 LDR (immediate, Thumb) Encoding T4
class T2IldT<bit signed, bits<2> type, string opc, InstrItinClass ii>
- : T2Ii8<(outs rGPR:$Rt), (ins t2addrmode_posimm8:$addr), ii, opc,
- "\t$Rt, $addr", []>, Sched<[WriteLd]> {
+ : T2Ii8p<(outs rGPR:$Rt), (ins t2addrmode_posimm8:$addr), ii, opc,
+ "\t$Rt, $addr", []>, Sched<[WriteLd]> {
bits<4> Rt;
bits<13> addr;
let Inst{31-27} = 0b11111;
@@ -1747,8 +1747,8 @@ def t2STR_POST_imm : t2AsmPseudo<"str${p}.w $Rt, $Rn, $imm",
// only.
// Ref: A8.6.193 STR (immediate, Thumb) Encoding T4
class T2IstT<bits<2> type, string opc, InstrItinClass ii>
- : T2Ii8<(outs), (ins rGPR:$Rt, t2addrmode_imm8:$addr), ii, opc,
- "\t$Rt, $addr", []>, Sched<[WriteST]> {
+ : T2Ii8p<(outs), (ins rGPR:$Rt, t2addrmode_posimm8:$addr), ii, opc,
+ "\t$Rt, $addr", []>, Sched<[WriteST]> {
let Inst{31-27} = 0b11111;
let Inst{26-25} = 0b00;
let Inst{24} = 0; // not signed
@@ -1851,8 +1851,8 @@ multiclass T2Ipl<bits<1> write, bits<1> instr, string opc> {
let DecoderMethod = "DecodeT2LoadImm12";
}
- def i8 : T2Ii8<(outs), (ins t2addrmode_negimm8:$addr), IIC_Preload, opc,
- "\t$addr",
+ def i8 : T2Ii8n<(outs), (ins t2addrmode_negimm8:$addr), IIC_Preload, opc,
+ "\t$addr",
[(ARMPreload t2addrmode_negimm8:$addr, (i32 write), (i32 instr))]>,
Sched<[WritePreLd]> {
let Inst{31-25} = 0b1111100;
@@ -2926,18 +2926,11 @@ let AddedComplexity = 1 in
def : T2Pat<(and rGPR:$src, t2_so_imm_not:$imm),
(t2BICri rGPR:$src, t2_so_imm_not:$imm)>;
-// top16Zero - answer true if the upper 16 bits of $src are 0, false otherwise
-def top16Zero: PatLeaf<(i32 rGPR:$src), [{
- return !SDValue(N,0)->getValueType(0).isVector() &&
- CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 16));
- }]>;
-
// so_imm_notSext is needed instead of so_imm_not, as the value of imm
// will match the extended, not the original bitWidth for $src.
def : T2Pat<(and top16Zero:$src, t2_so_imm_notSext:$imm),
(t2BICri rGPR:$src, t2_so_imm_notSext:$imm)>;
-
// FIXME: Disable this pattern on Darwin to workaround an assembler bug.
def : T2Pat<(or rGPR:$src, t2_so_imm_not:$imm),
(t2ORNri rGPR:$src, t2_so_imm_not:$imm)>,
@@ -3283,6 +3276,9 @@ def t2REV16 : T2I_misc<0b01, 0b01, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
[(set rGPR:$Rd, (rotr (bswap rGPR:$Rm), (i32 16)))]>,
Sched<[WriteALU]>;
+def : T2Pat<(srl (bswap top16Zero:$Rn), (i32 16)),
+ (t2REV16 rGPR:$Rn)>;
+
def t2REVSH : T2I_misc<0b01, 0b11, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
"revsh", ".w\t$Rd, $Rm",
[(set rGPR:$Rd, (sra (bswap rGPR:$Rm), (i32 16)))]>,
@@ -4059,6 +4055,8 @@ def t2HINT : T2I<(outs), (ins imm0_239:$imm), NoItinerary, "hint", ".w\t$imm",
bits<8> imm;
let Inst{31-3} = 0b11110011101011111000000000000;
let Inst{7-0} = imm;
+
+ let DecoderMethod = "DecodeT2HintSpaceInstruction";
}
def : t2InstAlias<"hint$p $imm", (t2HINT imm0_239:$imm, pred:$p), 0>;
@@ -4079,6 +4077,11 @@ def : t2InstAlias<"esb$p", (t2HINT 16, pred:$p), 0> {
def : t2InstAlias<"csdb$p.w", (t2HINT 20, pred:$p), 0>;
def : t2InstAlias<"csdb$p", (t2HINT 20, pred:$p), 1>;
+def : t2InstAlias<"pacbti$p r12,lr,sp", (t2HINT 13, pred:$p), 1>;
+def : t2InstAlias<"bti$p", (t2HINT 15, pred:$p), 1>;
+def : t2InstAlias<"pac$p r12,lr,sp", (t2HINT 29, pred:$p), 1>;
+def : t2InstAlias<"aut$p r12,lr,sp", (t2HINT 45, pred:$p), 1>;
+
def t2DBG : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "dbg", "\t$opt",
[(int_arm_dbg imm0_15:$opt)]> {
bits<4> opt;
@@ -4254,6 +4257,19 @@ def : T2Pat<(ARMWrapper tglobaladdr :$dst), (t2MOVi32imm tglobaladdr :$dst)>,
def : T2Pat<(ARMWrapperJT tjumptable:$dst), (t2LEApcrelJT tjumptable:$dst)>;
+let hasNoSchedulingInfo = 1 in {
+def t2LDRLIT_ga_pcrel : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr),
+ IIC_iLoadiALU,
+ [(set rGPR:$dst,
+ (ARMWrapperPIC tglobaladdr:$addr))]>,
+ Requires<[IsThumb, HasV8MBaseline, DontUseMovtInPic]>;
+}
+
+// TLS globals
+def : Pat<(ARMWrapperPIC tglobaltlsaddr:$addr),
+ (t2LDRLIT_ga_pcrel tglobaltlsaddr:$addr)>,
+ Requires<[IsThumb, HasV8MBaseline, DontUseMovtInPic]>;
+
// Pseudo instruction that combines ldr from constpool and add pc. This should
// be expanded into two instructions late to allow if-conversion and
// scheduling.
@@ -5607,6 +5623,15 @@ let Predicates = [HasV8_1MMainline] in {
defm : CSPats<ARMcsinv, t2CSINV>;
defm : CSPats<ARMcsneg, t2CSNEG>;
+ def : T2Pat<(ARMcmov (i32 1), (i32 0), cmovpred:$imm),
+ (t2CSINC ZR, ZR, imm0_31:$imm)>;
+ def : T2Pat<(ARMcmov (i32 -1), (i32 0), cmovpred:$imm),
+ (t2CSINV ZR, ZR, imm0_31:$imm)>;
+ def : T2Pat<(ARMcmov (i32 0), (i32 1), cmovpred:$imm),
+ (t2CSINC ZR, ZR, (inv_cond_XFORM imm:$imm))>;
+ def : T2Pat<(ARMcmov (i32 0), (i32 -1), cmovpred:$imm),
+ (t2CSINV ZR, ZR, (inv_cond_XFORM imm:$imm))>;
+
multiclass ModifiedV8_1CSEL<Instruction Insn, dag modvalue> {
def : T2Pat<(ARMcmov modvalue, GPRwithZR:$tval, cmovpred:$imm),
(Insn GPRwithZR:$tval, GPRwithZR:$fval, imm0_31:$imm)>;
@@ -5636,3 +5661,78 @@ let Predicates = [HasV8_1MMainline] in {
def : InstAlias<"cneg\t$Rd, $Rn, $fcond",
(t2CSNEG rGPR:$Rd, GPRwithZRnosp:$Rn, GPRwithZRnosp:$Rn, pred_noal_inv:$fcond)>;
}
+
+
+// PACBTI
+let Predicates = [IsThumb2, HasV8_1MMainline, HasPACBTI] in {
+def t2PACG : V8_1MI<(outs rGPR:$Rd),
+ (ins pred:$p, GPRnopc:$Rn, GPRnopc:$Rm),
+ AddrModeNone, NoItinerary, "pacg${p}", "$Rd, $Rn, $Rm", "", []> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{31-20} = 0b111110110110;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = 0b1111;
+ let Inst{11-8} = Rd;
+ let Inst{7-4} = 0b0000;
+ let Inst{3-0} = Rm;
+}
+
+let hasSideEffects = 1 in {
+class PACBTIAut<dag iops, string asm, bit b>
+ : V8_1MI<(outs), iops,
+ AddrModeNone, NoItinerary, asm, "$Ra, $Rn, $Rm", "", []> {
+ bits<4> Ra;
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{31-20} = 0b111110110101;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Ra;
+ let Inst{11-5} = 0b1111000;
+ let Inst{4} = b;
+ let Inst{3-0} = Rm;
+}
+}
+
+def t2AUTG : PACBTIAut<(ins pred:$p, GPRnosp:$Ra, GPRnopc:$Rn, GPRnopc:$Rm),
+ "autg${p}", 0>;
+
+let isBranch = 1, isTerminator = 1, isIndirectBranch = 1 in {
+ def t2BXAUT : PACBTIAut<(ins pred:$p, GPRnosp:$Ra, rGPR:$Rn, GPRnopc:$Rm),
+ "bxaut${p}", 1>;
+}
+}
+
+
+class PACBTIHintSpaceInst<string asm, string ops, bits<8> imm>
+ : V8_1MI<(outs), (ins), AddrModeNone, NoItinerary, asm, ops, "", []> {
+ let Inst{31-8} = 0b111100111010111110000000;
+ let Inst{7-0} = imm;
+
+ let Unpredictable{19-16} = 0b1111;
+ let Unpredictable{13-11} = 0b101;
+
+ let DecoderMethod = "DecodeT2HintSpaceInstruction";
+}
+
+class PACBTIHintSpaceNoOpsInst<string asm, bits<8> imm>
+ : PACBTIHintSpaceInst<asm, "", imm>;
+
+class PACBTIHintSpaceDefInst<string asm, bits<8> imm>
+ : PACBTIHintSpaceInst<asm, "r12, lr, sp", imm> {
+ let Defs = [R12];
+ let Uses = [LR, SP];
+}
+
+class PACBTIHintSpaceUseInst<string asm, bits<8> imm>
+ : PACBTIHintSpaceInst<asm, "r12, lr, sp", imm> {
+ let Uses = [R12, LR, SP];
+}
+
+def t2PAC : PACBTIHintSpaceDefInst<"pac", 0b00011101>;
+def t2PACBTI : PACBTIHintSpaceDefInst<"pacbti", 0b00001101>;
+def t2BTI : PACBTIHintSpaceNoOpsInst<"bti", 0b00001111>;
+def t2AUT : PACBTIHintSpaceUseInst<"aut", 0b00101101> {
+ let hasSideEffects = 1;
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 6e259b1baf97..3b10c60a0654 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -1298,8 +1298,8 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
// Can't use an updating ld/st if the base register is also a dest
// register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
- for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
- if (MI->getOperand(i).getReg() == Base)
+ for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 2))
+ if (MO.getReg() == Base)
return false;
int Bytes = getLSMultipleTransferSize(MI);
@@ -1326,8 +1326,8 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
return false;
bool HighRegsUsed = false;
- for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
- if (MI->getOperand(i).getReg() >= ARM::R8) {
+ for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 2))
+ if (MO.getReg() >= ARM::R8) {
HighRegsUsed = true;
break;
}
@@ -1350,8 +1350,8 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
.addImm(Pred).addReg(PredReg);
// Transfer the rest of operands.
- for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
- MIB.add(MI->getOperand(OpNum));
+ for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 3))
+ MIB.add(MO);
// Transfer memoperands.
MIB.setMemRefs(MI->memoperands());
@@ -2119,9 +2119,7 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
isThumb1 = AFI->isThumbFunction() && !isThumb2;
bool Modified = false;
- for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
- ++MFI) {
- MachineBasicBlock &MBB = *MFI;
+ for (MachineBasicBlock &MBB : Fn) {
Modified |= LoadStoreMultipleOpti(MBB);
if (STI->hasV5TOps())
Modified |= MergeReturnIntoLDM(MBB);
@@ -2710,13 +2708,13 @@ static bool isLegalOrConvertableAddressImm(unsigned Opcode, int Imm,
if (isLegalAddressImm(Opcode, Imm, TII))
return true;
- // We can convert AddrModeT2_i12 to AddrModeT2_i8.
+ // We can convert AddrModeT2_i12 to AddrModeT2_i8neg.
const MCInstrDesc &Desc = TII->get(Opcode);
unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
switch (AddrMode) {
case ARMII::AddrModeT2_i12:
CodesizeEstimate += 1;
- return std::abs(Imm) < (((1 << 8) * 1) - 1);
+ return Imm < 0 && -Imm < ((1 << 8) * 1);
}
return false;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp
index 507c3e69b3a4..308d5e7889f2 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp
@@ -13,8 +13,63 @@ using namespace llvm;
void ARMFunctionInfo::anchor() {}
+static bool GetBranchTargetEnforcement(MachineFunction &MF) {
+ const auto &Subtarget = MF.getSubtarget<ARMSubtarget>();
+ if (!Subtarget.isMClass() || !Subtarget.hasV7Ops())
+ return false;
+
+ const Function &F = MF.getFunction();
+ if (!F.hasFnAttribute("branch-target-enforcement")) {
+ if (const auto *BTE = mdconst::extract_or_null<ConstantInt>(
+ F.getParent()->getModuleFlag("branch-target-enforcement")))
+ return BTE->getZExtValue();
+ return false;
+ }
+
+ const StringRef BTIEnable =
+ F.getFnAttribute("branch-target-enforcement").getValueAsString();
+ assert(BTIEnable.equals_insensitive("true") ||
+ BTIEnable.equals_insensitive("false"));
+ return BTIEnable.equals_insensitive("true");
+}
+
+// The pair returns values for the ARMFunctionInfo members
+// SignReturnAddress and SignReturnAddressAll respectively.
+static std::pair<bool, bool> GetSignReturnAddress(const Function &F) {
+ if (!F.hasFnAttribute("sign-return-address")) {
+ const Module &M = *F.getParent();
+ if (const auto *Sign = mdconst::extract_or_null<ConstantInt>(
+ M.getModuleFlag("sign-return-address"))) {
+ if (Sign->getZExtValue()) {
+ if (const auto *All = mdconst::extract_or_null<ConstantInt>(
+ M.getModuleFlag("sign-return-address-all")))
+ return {true, All->getZExtValue()};
+ return {true, false};
+ }
+ }
+ return {false, false};
+ }
+
+ StringRef Scope = F.getFnAttribute("sign-return-address").getValueAsString();
+ if (Scope.equals("none"))
+ return {false, false};
+
+ if (Scope.equals("all"))
+ return {true, true};
+
+ assert(Scope.equals("non-leaf"));
+ return {true, false};
+}
+
ARMFunctionInfo::ARMFunctionInfo(MachineFunction &MF)
: isThumb(MF.getSubtarget<ARMSubtarget>().isThumb()),
hasThumb2(MF.getSubtarget<ARMSubtarget>().hasThumb2()),
IsCmseNSEntry(MF.getFunction().hasFnAttribute("cmse_nonsecure_entry")),
- IsCmseNSCall(MF.getFunction().hasFnAttribute("cmse_nonsecure_call")) {}
+ IsCmseNSCall(MF.getFunction().hasFnAttribute("cmse_nonsecure_call")),
+ BranchTargetEnforcement(GetBranchTargetEnforcement(MF)) {
+
+ const auto &Subtarget = MF.getSubtarget<ARMSubtarget>();
+ if (Subtarget.isMClass() && Subtarget.hasV7Ops())
+ std::tie(SignReturnAddress, SignReturnAddressAll) =
+ GetSignReturnAddress(MF.getFunction());
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/contrib/llvm-project/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
index 851655284060..4077fc058217 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -142,6 +142,17 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// con/destructors).
bool PreservesR0 = false;
+ /// True if the function should sign its return address.
+ bool SignReturnAddress = false;
+
+ /// True if the fucntion should sign its return address, even if LR is not
+ /// saved.
+ bool SignReturnAddressAll = false;
+
+ /// True if BTI instructions should be placed at potential indirect jump
+ /// destinations.
+ bool BranchTargetEnforcement = false;
+
public:
ARMFunctionInfo() = default;
@@ -268,6 +279,20 @@ public:
void setPreservesR0() { PreservesR0 = true; }
bool getPreservesR0() const { return PreservesR0; }
+
+ bool shouldSignReturnAddress() const {
+ return shouldSignReturnAddress(LRSpilled);
+ }
+
+ bool shouldSignReturnAddress(bool SpillsLR) const {
+ if (!SignReturnAddress)
+ return false;
+ if (SignReturnAddressAll)
+ return true;
+ return LRSpilled;
+ }
+
+ bool branchTargetEnforcement() const { return BranchTargetEnforcement; }
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMPredicates.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARMPredicates.td
index 2dc097566d14..c0dc6a363471 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMPredicates.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMPredicates.td
@@ -107,6 +107,8 @@ def HasRAS : Predicate<"Subtarget->hasRAS()">,
AssemblerPredicate<(all_of FeatureRAS), "ras">;
def HasLOB : Predicate<"Subtarget->hasLOB()">,
AssemblerPredicate<(all_of FeatureLOB), "lob">;
+def HasPACBTI : Predicate<"Subtarget->hasPACBTI()">,
+ AssemblerPredicate<(all_of FeaturePACBTI), "pacbti">;
def HasFP16 : Predicate<"Subtarget->hasFP16()">,
AssemblerPredicate<(all_of FeatureFP16),"half-float conversions">;
def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">,
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMRegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARMRegisterInfo.td
index 9752b3166b45..760a5a5a20cf 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMRegisterInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMRegisterInfo.td
@@ -277,6 +277,16 @@ def GPRwithAPSR : RegisterClass<"ARM", [i32], 32, (add (sub GPR, PC), APSR_NZCV)
let DiagnosticString = "operand must be a register in range [r0, r14] or apsr_nzcv";
}
+// GPRs without the SP register. Used for BXAUT and AUTG
+def GPRnosp : RegisterClass<"ARM", [i32], 32, (add (sequence "R%u", 0, 12), LR, PC)> {
+ let AltOrders = [(add LR, GPRnosp), (trunc GPRnosp, 8),
+ (add (trunc GPRnosp, 8), R12, LR, (shl GPRnosp, 8))];
+ let AltOrderSelect = [{
+ return MF.getSubtarget<ARMSubtarget>().getGPRAllocationOrder(MF);
+ }];
+ let DiagnosticString = "operand must be a register in range [r0, r12] or LR or PC";
+}
+
// GPRs without the PC and SP registers but with APSR. Used by CLRM instruction.
def GPRwithAPSRnosp : RegisterClass<"ARM", [i32], 32, (add (sequence "R%u", 0, 12), LR, APSR)> {
let isAllocatable = 0;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMSubtarget.h b/contrib/llvm-project/llvm/lib/Target/ARM/ARMSubtarget.h
index 5e1217b6a468..d51a888c951f 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -373,6 +373,8 @@ protected:
/// HasLOB - if true, the processor supports the Low Overhead Branch extension
bool HasLOB = false;
+ bool HasPACBTI = false;
+
/// If true, the instructions "vmov.i32 d0, #0" and "vmov.i32 q0, #0" are
/// particularly effective at zeroing a VFP register.
bool HasZeroCycleZeroing = false;
@@ -671,6 +673,7 @@ public:
bool hasCRC() const { return HasCRC; }
bool hasRAS() const { return HasRAS; }
bool hasLOB() const { return HasLOB; }
+ bool hasPACBTI() const { return HasPACBTI; }
bool hasVirtualization() const { return HasVirtualization; }
bool useNEONForSinglePrecisionFP() const {
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMSystemRegister.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARMSystemRegister.td
index f21c7f0246f9..c03db15d1041 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMSystemRegister.td
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMSystemRegister.td
@@ -106,6 +106,24 @@ def : MClassSysReg<0, 0, 1, 0x894, "control_ns">;
def : MClassSysReg<0, 0, 1, 0x898, "sp_ns">;
}
+let Requires = [{ {ARM::FeaturePACBTI} }] in {
+def : MClassSysReg<0, 0, 1, 0x820, "pac_key_p_0">;
+def : MClassSysReg<0, 0, 1, 0x821, "pac_key_p_1">;
+def : MClassSysReg<0, 0, 1, 0x822, "pac_key_p_2">;
+def : MClassSysReg<0, 0, 1, 0x823, "pac_key_p_3">;
+def : MClassSysReg<0, 0, 1, 0x824, "pac_key_u_0">;
+def : MClassSysReg<0, 0, 1, 0x825, "pac_key_u_1">;
+def : MClassSysReg<0, 0, 1, 0x826, "pac_key_u_2">;
+def : MClassSysReg<0, 0, 1, 0x827, "pac_key_u_3">;
+def : MClassSysReg<0, 0, 1, 0x8a0, "pac_key_p_0_ns">;
+def : MClassSysReg<0, 0, 1, 0x8a1, "pac_key_p_1_ns">;
+def : MClassSysReg<0, 0, 1, 0x8a2, "pac_key_p_2_ns">;
+def : MClassSysReg<0, 0, 1, 0x8a3, "pac_key_p_3_ns">;
+def : MClassSysReg<0, 0, 1, 0x8a4, "pac_key_u_0_ns">;
+def : MClassSysReg<0, 0, 1, 0x8a5, "pac_key_u_1_ns">;
+def : MClassSysReg<0, 0, 1, 0x8a6, "pac_key_u_2_ns">;
+def : MClassSysReg<0, 0, 1, 0x8a7, "pac_key_u_3_ns">;
+}
// Banked Registers
//
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetMachine.cpp
index 833c7effd31c..0b314ac2a41e 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetMachine.cpp
@@ -92,6 +92,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeARMTarget() {
initializeARMLoadStoreOptPass(Registry);
initializeARMPreAllocLoadStoreOptPass(Registry);
initializeARMParallelDSPPass(Registry);
+ initializeARMBranchTargetsPass(Registry);
initializeARMConstantIslandsPass(Registry);
initializeARMExecutionDomainFixPass(Registry);
initializeARMExpandPseudoPass(Registry);
@@ -571,6 +572,7 @@ void ARMPassConfig::addPreEmitPass() {
}
void ARMPassConfig::addPreEmitPass2() {
+ addPass(createARMBranchTargetsPass());
addPass(createARMConstantIslandPass());
addPass(createARMLowOverheadLoopsPass());
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 88de84a4fd78..602c6745d310 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -334,8 +334,9 @@ InstructionCost ARMTTIImpl::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
}
// Checks whether Inst is part of a min(max()) or max(min()) pattern
-// that will match to an SSAT instruction
-static bool isSSATMinMaxPattern(Instruction *Inst, const APInt &Imm) {
+// that will match to an SSAT instruction. Returns the instruction being
+// saturated, or null if no saturation pattern was found.
+static Value *isSSATMinMaxPattern(Instruction *Inst, const APInt &Imm) {
Value *LHS, *RHS;
ConstantInt *C;
SelectPatternFlavor InstSPF = matchSelectPattern(Inst, LHS, RHS).Flavor;
@@ -358,12 +359,27 @@ static bool isSSATMinMaxPattern(Instruction *Inst, const APInt &Imm) {
return false;
};
- if (isSSatMin(Inst->getOperand(1)) ||
- (Inst->hasNUses(2) && (isSSatMin(*Inst->user_begin()) ||
- isSSatMin(*(++Inst->user_begin())))))
- return true;
+ if (isSSatMin(Inst->getOperand(1)))
+ return cast<Instruction>(Inst->getOperand(1))->getOperand(1);
+ if (Inst->hasNUses(2) &&
+ (isSSatMin(*Inst->user_begin()) || isSSatMin(*(++Inst->user_begin()))))
+ return Inst->getOperand(1);
}
- return false;
+ return nullptr;
+}
+
+// Look for a FP Saturation pattern, where the instruction can be simplified to
+// a fptosi.sat. max(min(fptosi)). The constant in this case is always free.
+static bool isFPSatMinMaxPattern(Instruction *Inst, const APInt &Imm) {
+ if (Imm.getBitWidth() != 64 ||
+ Imm != APInt::getHighBitsSet(64, 33)) // -2147483648
+ return false;
+ Value *FP = isSSATMinMaxPattern(Inst, Imm);
+ if (!FP && isa<ICmpInst>(Inst) && Inst->hasOneUse())
+ FP = isSSATMinMaxPattern(cast<Instruction>(*Inst->user_begin()), Imm);
+ if (!FP)
+ return false;
+ return isa<FPToSIInst>(FP);
}
InstructionCost ARMTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
@@ -423,6 +439,9 @@ InstructionCost ARMTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
return 0;
}
+ if (Inst && ST->hasVFP2Base() && isFPSatMinMaxPattern(Inst, Imm))
+ return 0;
+
// We can convert <= -1 to < 0, which is generally quite cheap.
if (Inst && Opcode == Instruction::ICmp && Idx == 1 && Imm.isAllOnesValue()) {
ICmpInst::Predicate Pred = cast<ICmpInst>(Inst)->getPredicate();
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 64d2e1bfa9b2..39f407ba7149 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -6429,15 +6429,17 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
Mnemonic == "vrintp" || Mnemonic == "vrintm" || Mnemonic == "hvc" ||
Mnemonic.startswith("vsel") || Mnemonic == "vins" || Mnemonic == "vmovx" ||
Mnemonic == "bxns" || Mnemonic == "blxns" ||
- Mnemonic == "vdot" || Mnemonic == "vmmla" ||
+ Mnemonic == "vdot" || Mnemonic == "vmmla" ||
Mnemonic == "vudot" || Mnemonic == "vsdot" ||
Mnemonic == "vcmla" || Mnemonic == "vcadd" ||
Mnemonic == "vfmal" || Mnemonic == "vfmsl" ||
- Mnemonic == "wls" || Mnemonic == "le" || Mnemonic == "dls" ||
- Mnemonic == "csel" || Mnemonic == "csinc" ||
+ Mnemonic == "wls" || Mnemonic == "le" || Mnemonic == "dls" ||
+ Mnemonic == "csel" || Mnemonic == "csinc" ||
Mnemonic == "csinv" || Mnemonic == "csneg" || Mnemonic == "cinc" ||
- Mnemonic == "cinv" || Mnemonic == "cneg" || Mnemonic == "cset" ||
- Mnemonic == "csetm")
+ Mnemonic == "cinv" || Mnemonic == "cneg" || Mnemonic == "cset" ||
+ Mnemonic == "csetm" ||
+ Mnemonic == "aut" || Mnemonic == "pac" || Mnemonic == "pacbti" ||
+ Mnemonic == "bti")
return Mnemonic;
// First, split out any predication code. Ignore mnemonics we know aren't
@@ -6581,9 +6583,11 @@ void ARMAsmParser::getMnemonicAcceptInfo(StringRef Mnemonic,
Mnemonic == "csinc" || Mnemonic == "csinv" || Mnemonic == "csneg" ||
Mnemonic == "cinc" || Mnemonic == "cinv" || Mnemonic == "cneg" ||
Mnemonic == "cset" || Mnemonic == "csetm" ||
- Mnemonic.startswith("vpt") || Mnemonic.startswith("vpst") ||
(hasCDE() && MS.isCDEInstr(Mnemonic) &&
!MS.isITPredicableCDEInstr(Mnemonic)) ||
+ Mnemonic.startswith("vpt") || Mnemonic.startswith("vpst") ||
+ Mnemonic == "pac" || Mnemonic == "pacbti" || Mnemonic == "aut" ||
+ Mnemonic == "bti" ||
(hasMVE() &&
(Mnemonic.startswith("vst2") || Mnemonic.startswith("vld2") ||
Mnemonic.startswith("vst4") || Mnemonic.startswith("vld4") ||
@@ -12272,6 +12276,7 @@ bool ARMAsmParser::enableArchExtFeature(StringRef Name, SMLoc &ExtLoc) {
{ARM::FeatureFPARMv8, ARM::FeatureFullFP16}},
{ARM::AEK_RAS, {Feature_HasV8Bit}, {ARM::FeatureRAS}},
{ARM::AEK_LOB, {Feature_HasV8_1MMainlineBit}, {ARM::FeatureLOB}},
+ {ARM::AEK_PACBTI, {Feature_HasV8_1MMainlineBit}, {ARM::FeaturePACBTI}},
// FIXME: Unsupported extensions.
{ARM::AEK_OS, {}, {}},
{ARM::AEK_IWMMXT, {}, {}},
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 9caef9f09ea9..c3df7dc88d79 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -185,8 +185,11 @@ static DecodeStatus DecodetGPREvenRegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus
DecodeGPRwithAPSR_NZCVnospRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeGPRnopcRegisterClass(MCInst &Inst,
- unsigned RegNo, uint64_t Address,
+static DecodeStatus DecodeGPRnopcRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeGPRnospRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
const void *Decoder);
static DecodeStatus DecodeGPRwithAPSRRegisterClass(MCInst &Inst,
unsigned RegNo, uint64_t Address,
@@ -287,6 +290,9 @@ static DecodeStatus DecodeSETPANInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeT2CPSInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2HintSpaceInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
static DecodeStatus DecodeAddrModeImm12Operand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeAddrMode5Operand(MCInst &Inst, unsigned Val,
@@ -1172,6 +1178,19 @@ DecodeGPRnopcRegisterClass(MCInst &Inst, unsigned RegNo,
return S;
}
+static DecodeStatus DecodeGPRnospRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ if (RegNo == 13)
+ S = MCDisassembler::SoftFail;
+
+ Check(S, DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder));
+
+ return S;
+}
+
static DecodeStatus
DecodeGPRwithAPSRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder) {
@@ -2441,6 +2460,31 @@ static DecodeStatus DecodeT2CPSInstruction(MCInst &Inst, unsigned Insn,
return S;
}
+static DecodeStatus DecodeT2HintSpaceInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ unsigned imm = fieldFromInstruction(Insn, 0, 8);
+
+ unsigned Opcode = ARM::t2HINT;
+
+ if (imm == 0x0D) {
+ Opcode = ARM::t2PACBTI;
+ } else if (imm == 0x1D) {
+ Opcode = ARM::t2PAC;
+ } else if (imm == 0x2D) {
+ Opcode = ARM::t2AUT;
+ } else if (imm == 0x0F) {
+ Opcode = ARM::t2BTI;
+ }
+
+ Inst.setOpcode(Opcode);
+ if (Opcode == ARM::t2HINT) {
+ Inst.addOperand(MCOperand::createImm(imm));
+ }
+
+ return MCDisassembler::Success;
+}
+
static DecodeStatus DecodeT2MOVTWInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -4726,6 +4770,25 @@ static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Val,
if (!(FeatureBits[ARM::Feature8MSecExt]))
return MCDisassembler::Fail;
break;
+ case 0x20: // pac_key_p_0
+ case 0x21: // pac_key_p_1
+ case 0x22: // pac_key_p_2
+ case 0x23: // pac_key_p_3
+ case 0x24: // pac_key_u_0
+ case 0x25: // pac_key_u_1
+ case 0x26: // pac_key_u_2
+ case 0x27: // pac_key_u_3
+ case 0xa0: // pac_key_p_0_ns
+ case 0xa1: // pac_key_p_1_ns
+ case 0xa2: // pac_key_p_2_ns
+ case 0xa3: // pac_key_p_3_ns
+ case 0xa4: // pac_key_u_0_ns
+ case 0xa5: // pac_key_u_1_ns
+ case 0xa6: // pac_key_u_2_ns
+ case 0xa7: // pac_key_u_3_ns
+ if (!(FeatureBits[ARM::FeaturePACBTI]))
+ return MCDisassembler::Fail;
+ break;
default:
// Architecturally defined as unpredictable
S = MCDisassembler::SoftFail;
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
index 43f7575df6db..f8de0320166a 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
@@ -195,16 +195,18 @@ namespace ARMII {
AddrModeT1_4 = 9,
AddrModeT1_s = 10, // i8 * 4 for pc and sp relative data
AddrModeT2_i12 = 11,
- AddrModeT2_i8 = 12,
- AddrModeT2_so = 13,
- AddrModeT2_pc = 14, // +/- i12 for pc relative data
- AddrModeT2_i8s4 = 15, // i8 * 4
- AddrMode_i12 = 16,
- AddrMode5FP16 = 17, // i8 * 2
- AddrModeT2_ldrex = 18, // i8 * 4, with unscaled offset in MCInst
- AddrModeT2_i7s4 = 19, // i7 * 4
- AddrModeT2_i7s2 = 20, // i7 * 2
- AddrModeT2_i7 = 21, // i7 * 1
+ AddrModeT2_i8 = 12, // +/- i8
+ AddrModeT2_i8pos = 13, // + i8
+ AddrModeT2_i8neg = 14, // - i8
+ AddrModeT2_so = 15,
+ AddrModeT2_pc = 16, // +/- i12 for pc relative data
+ AddrModeT2_i8s4 = 17, // i8 * 4
+ AddrMode_i12 = 18,
+ AddrMode5FP16 = 19, // i8 * 2
+ AddrModeT2_ldrex = 20, // i8 * 4, with unscaled offset in MCInst
+ AddrModeT2_i7s4 = 21, // i7 * 4
+ AddrModeT2_i7s2 = 22, // i7 * 2
+ AddrModeT2_i7 = 23, // i7 * 1
};
inline static const char *AddrModeToString(AddrMode addrmode) {
@@ -223,6 +225,8 @@ namespace ARMII {
case AddrModeT1_s: return "AddrModeT1_s";
case AddrModeT2_i12: return "AddrModeT2_i12";
case AddrModeT2_i8: return "AddrModeT2_i8";
+ case AddrModeT2_i8pos: return "AddrModeT2_i8pos";
+ case AddrModeT2_i8neg: return "AddrModeT2_i8neg";
case AddrModeT2_so: return "AddrModeT2_so";
case AddrModeT2_pc: return "AddrModeT2_pc";
case AddrModeT2_i8s4: return "AddrModeT2_i8s4";
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
index 3e4c97630af6..02a2d01176fc 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
@@ -299,4 +299,9 @@ void ARMTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI) {
else if (STI.hasFeature(ARM::FeatureVirtualization))
emitAttribute(ARMBuildAttrs::Virtualization_use,
ARMBuildAttrs::AllowVirtualization);
+
+ if (STI.hasFeature(ARM::FeaturePACBTI)) {
+ emitAttribute(ARMBuildAttrs::PAC_extension, ARMBuildAttrs::AllowPAC);
+ emitAttribute(ARMBuildAttrs::BTI_extension, ARMBuildAttrs::AllowBTI);
+ }
}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
index e4e95f63f0a6..224c61b9f065 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -205,9 +205,9 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
return;
}
- for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- unsigned Reg = CSI[i].getReg();
- int FI = CSI[i].getFrameIdx();
+ for (const CalleeSavedInfo &I : CSI) {
+ unsigned Reg = I.getReg();
+ int FI = I.getFrameIdx();
switch (Reg) {
case ARM::R8:
case ARM::R9:
@@ -266,10 +266,9 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
.addCFIIndex(CFIIndex)
.setMIFlags(MachineInstr::FrameSetup);
}
- for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
- E = CSI.end(); I != E; ++I) {
- unsigned Reg = I->getReg();
- int FI = I->getFrameIdx();
+ for (const CalleeSavedInfo &I : CSI) {
+ unsigned Reg = I.getReg();
+ int FI = I.getFrameIdx();
switch (Reg) {
case ARM::R8:
case ARM::R9:
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
index bdb167a08e61..ebd139af2219 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -261,7 +261,7 @@ void Thumb2InstrInfo::expandLoadStackGuard(
cast<GlobalValue>((*MI->memoperands_begin())->getValue());
if (MF.getSubtarget<ARMSubtarget>().isGVInGOT(GV))
- expandLoadStackGuardBase(MI, ARM::tLDRLIT_ga_pcrel, ARM::t2LDRi12);
+ expandLoadStackGuardBase(MI, ARM::t2LDRLIT_ga_pcrel, ARM::t2LDRi12);
else if (MF.getTarget().isPositionIndependent())
expandLoadStackGuardBase(MI, ARM::t2MOV_ga_pcrel, ARM::t2LDRi12);
else
@@ -634,7 +634,8 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
unsigned NumBits = 0;
unsigned Scale = 1;
- if (AddrMode == ARMII::AddrModeT2_i8 || AddrMode == ARMII::AddrModeT2_i12) {
+ if (AddrMode == ARMII::AddrModeT2_i8neg ||
+ AddrMode == ARMII::AddrModeT2_i12) {
// i8 supports only negative, and i12 supports only positive, so
// based on Offset sign convert Opcode to the appropriate
// instruction
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
index 132516694f4e..1164b6ebbac3 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -502,8 +502,8 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
// For the non-writeback version (this one), the base register must be
// one of the registers being loaded.
bool isOK = false;
- for (unsigned i = 3; i < MI->getNumOperands(); ++i) {
- if (MI->getOperand(i).getReg() == BaseReg) {
+ for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 3)) {
+ if (MO.getReg() == BaseReg) {
isOK = true;
break;
}
@@ -527,8 +527,8 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
// numbered register (i.e. it's in operand 4 onwards) then with writeback
// the stored value is unknown, so we can't convert to tSTMIA_UPD.
Register BaseReg = MI->getOperand(0).getReg();
- for (unsigned i = 4; i < MI->getNumOperands(); ++i)
- if (MI->getOperand(i).getReg() == BaseReg)
+ for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 4))
+ if (MO.getReg() == BaseReg)
return false;
break;
@@ -611,8 +611,8 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
}
// Transfer the rest of operands.
- for (unsigned e = MI->getNumOperands(); OpNum != e; ++OpNum)
- MIB.add(MI->getOperand(OpNum));
+ for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), OpNum))
+ MIB.add(MO);
// Transfer memoperands.
MIB.setMemRefs(MI->memoperands());
diff --git a/contrib/llvm-project/llvm/lib/Target/BPF/BPFMCInstLower.cpp b/contrib/llvm-project/llvm/lib/Target/BPF/BPFMCInstLower.cpp
index 846798a63cb7..2ce9c386f24c 100644
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BPFMCInstLower.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFMCInstLower.cpp
@@ -47,9 +47,7 @@ MCOperand BPFMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
void BPFMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
OutMI.setOpcode(MI->getOpcode());
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
-
+ for (const MachineOperand &MO : MI->operands()) {
MCOperand MCOp;
switch (MO.getType()) {
default:
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/BitTracker.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/BitTracker.cpp
index 8bced3cec082..685bafd785df 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/BitTracker.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/BitTracker.cpp
@@ -214,9 +214,9 @@ bool BT::RegisterCell::meet(const RegisterCell &RC, Register SelfR) {
BT::RegisterCell &BT::RegisterCell::insert(const BT::RegisterCell &RC,
const BitMask &M) {
uint16_t B = M.first(), E = M.last(), W = width();
- // Sanity: M must be a valid mask for *this.
+ // M must be a valid mask for *this.
assert(B < W && E < W);
- // Sanity: the masked part of *this must have the same number of bits
+ // The masked part of *this must have the same number of bits
// as the source.
assert(B > E || E-B+1 == RC.width()); // B <= E => E-B+1 = |RC|.
assert(B <= E || E+(W-B)+1 == RC.width()); // E < B => E+(W-B)+1 = |RC|.
@@ -850,8 +850,7 @@ void BT::visitNonBranch(const MachineInstr &MI) {
bool Eval = ME.evaluate(MI, Map, ResMap);
if (Trace && Eval) {
- for (unsigned i = 0, n = MI.getNumOperands(); i < n; ++i) {
- const MachineOperand &MO = MI.getOperand(i);
+ for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg() || !MO.isUse())
continue;
RegisterRef RU(MO);
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp
index 0f6dedeb28c3..1938a5c259da 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp
@@ -189,7 +189,7 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI,
unsigned NumDefs = 0;
- // Sanity verification: there should not be any defs with subregisters.
+ // Basic correctness check: there should not be any defs with subregisters.
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg() || !MO.isDef())
continue;
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp
index 43f0758f6598..8c3b9572201e 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp
@@ -476,10 +476,10 @@ namespace {
} // end anonymous namespace
static const NodeSet *node_class(GepNode *N, NodeSymRel &Rel) {
- for (NodeSymRel::iterator I = Rel.begin(), E = Rel.end(); I != E; ++I)
- if (I->count(N))
- return &*I;
- return nullptr;
+ for (const NodeSet &S : Rel)
+ if (S.count(N))
+ return &S;
+ return nullptr;
}
// Create an ordered pair of GepNode pointers. The pair will be used in
@@ -589,9 +589,8 @@ void HexagonCommonGEP::common() {
dbgs() << "{ " << I->first << ", " << I->second << " }\n";
dbgs() << "Gep equivalence classes:\n";
- for (NodeSymRel::iterator I = EqRel.begin(), E = EqRel.end(); I != E; ++I) {
+ for (const NodeSet &S : EqRel) {
dbgs() << '{';
- const NodeSet &S = *I;
for (NodeSet::const_iterator J = S.begin(), F = S.end(); J != F; ++J) {
if (J != S.begin())
dbgs() << ',';
@@ -604,8 +603,7 @@ void HexagonCommonGEP::common() {
// Create a projection from a NodeSet to the minimal element in it.
using ProjMap = std::map<const NodeSet *, GepNode *>;
ProjMap PM;
- for (NodeSymRel::iterator I = EqRel.begin(), E = EqRel.end(); I != E; ++I) {
- const NodeSet &S = *I;
+ for (const NodeSet &S : EqRel) {
GepNode *Min = *std::min_element(S.begin(), S.end(), NodeOrder);
std::pair<ProjMap::iterator,bool> Ins = PM.insert(std::make_pair(&S, Min));
(void)Ins;
@@ -1280,8 +1278,8 @@ bool HexagonCommonGEP::runOnFunction(Function &F) {
return false;
// For now bail out on C++ exception handling.
- for (Function::iterator A = F.begin(), Z = F.end(); A != Z; ++A)
- for (BasicBlock::iterator I = A->begin(), E = A->end(); I != E; ++I)
+ for (const BasicBlock &BB : F)
+ for (const Instruction &I : BB)
if (isa<InvokeInst>(I) || isa<LandingPadInst>(I))
return false;
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
index a774baaa48e6..d3fcdb6ae9a8 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
@@ -1254,7 +1254,7 @@ void HCE::collect(MachineFunction &MF) {
void HCE::assignInits(const ExtRoot &ER, unsigned Begin, unsigned End,
AssignmentMap &IMap) {
- // Sanity check: make sure that all extenders in the range [Begin..End)
+ // Basic correctness: make sure that all extenders in the range [Begin..End)
// share the same root ER.
for (unsigned I = Begin; I != End; ++I)
assert(ER == ExtRoot(Extenders[I].getOp()));
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp
index 23d0cc829e52..03b0f75b2dc1 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp
@@ -237,12 +237,9 @@ static bool isEvenReg(unsigned Reg) {
}
static void removeKillInfo(MachineInstr &MI, unsigned RegNotKilled) {
- for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
- MachineOperand &Op = MI.getOperand(I);
- if (!Op.isReg() || Op.getReg() != RegNotKilled || !Op.isKill())
- continue;
- Op.setIsKill(false);
- }
+ for (MachineOperand &Op : MI.operands())
+ if (Op.isReg() && Op.getReg() == RegNotKilled && Op.isKill())
+ Op.setIsKill(false);
}
/// Returns true if it is unsafe to move a copy instruction from \p UseReg to
@@ -403,10 +400,7 @@ HexagonCopyToCombine::findPotentialNewifiableTFRs(MachineBasicBlock &BB) {
// Mark TFRs that feed a potential new value store as such.
if (TII->mayBeNewStore(MI)) {
// Look for uses of TFR instructions.
- for (unsigned OpdIdx = 0, OpdE = MI.getNumOperands(); OpdIdx != OpdE;
- ++OpdIdx) {
- MachineOperand &Op = MI.getOperand(OpdIdx);
-
+ for (const MachineOperand &Op : MI.operands()) {
// Skip over anything except register uses.
if (!Op.isReg() || !Op.isUse() || !Op.getReg())
continue;
@@ -484,14 +478,13 @@ bool HexagonCopyToCombine::runOnMachineFunction(MachineFunction &MF) {
IsConst64Disabled = true;
// Traverse basic blocks.
- for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE;
- ++BI) {
+ for (MachineBasicBlock &MBB : MF) {
PotentiallyNewifiableTFR.clear();
- findPotentialNewifiableTFRs(*BI);
+ findPotentialNewifiableTFRs(MBB);
// Traverse instructions in basic block.
- for(MachineBasicBlock::iterator MI = BI->begin(), End = BI->end();
- MI != End;) {
+ for (MachineBasicBlock::iterator MI = MBB.begin(), End = MBB.end();
+ MI != End;) {
MachineInstr &I1 = *MI++;
if (I1.isDebugInstr())
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
index bff596e69efd..12ceac545e9d 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -1404,18 +1404,18 @@ bool HexagonFrameLowering::insertCSRSpillsInBlock(MachineBasicBlock &MBB,
// Add callee-saved registers as use.
addCalleeSaveRegistersAsImpOperand(SaveRegsCall, CSI, false, true);
// Add live in registers.
- for (unsigned I = 0; I < CSI.size(); ++I)
- MBB.addLiveIn(CSI[I].getReg());
+ for (const CalleeSavedInfo &I : CSI)
+ MBB.addLiveIn(I.getReg());
return true;
}
- for (unsigned i = 0, n = CSI.size(); i < n; ++i) {
- unsigned Reg = CSI[i].getReg();
+ for (const CalleeSavedInfo &I : CSI) {
+ unsigned Reg = I.getReg();
// Add live in registers. We treat eh_return callee saved register r0 - r3
// specially. They are not really callee saved registers as they are not
// supposed to be killed.
bool IsKill = !HRI.isEHReturnCalleeSaveReg(Reg);
- int FI = CSI[i].getFrameIdx();
+ int FI = I.getFrameIdx();
const TargetRegisterClass *RC = HRI.getMinimalPhysRegClass(Reg);
HII.storeRegToStackSlot(MBB, MI, Reg, IsKill, FI, RC, &HRI);
if (IsKill)
@@ -1478,10 +1478,10 @@ bool HexagonFrameLowering::insertCSRRestoresInBlock(MachineBasicBlock &MBB,
return true;
}
- for (unsigned i = 0; i < CSI.size(); ++i) {
- unsigned Reg = CSI[i].getReg();
+ for (const CalleeSavedInfo &I : CSI) {
+ unsigned Reg = I.getReg();
const TargetRegisterClass *RC = HRI.getMinimalPhysRegClass(Reg);
- int FI = CSI[i].getFrameIdx();
+ int FI = I.getFrameIdx();
HII.loadRegFromStackSlot(MBB, MI, Reg, FI, RC, &HRI);
}
@@ -1619,8 +1619,8 @@ bool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF,
// (1) For each callee-saved register, add that register and all of its
// sub-registers to SRegs.
LLVM_DEBUG(dbgs() << "Initial CS registers: {");
- for (unsigned i = 0, n = CSI.size(); i < n; ++i) {
- unsigned R = CSI[i].getReg();
+ for (const CalleeSavedInfo &I : CSI) {
+ unsigned R = I.getReg();
LLVM_DEBUG(dbgs() << ' ' << printReg(R, TRI));
for (MCSubRegIterator SR(R, TRI, true); SR.isValid(); ++SR)
SRegs[*SR] = true;
@@ -1720,10 +1720,10 @@ bool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF,
LLVM_DEBUG({
dbgs() << "CS information: {";
- for (unsigned i = 0, n = CSI.size(); i < n; ++i) {
- int FI = CSI[i].getFrameIdx();
+ for (const CalleeSavedInfo &I : CSI) {
+ int FI = I.getFrameIdx();
int Off = MFI.getObjectOffset(FI);
- dbgs() << ' ' << printReg(CSI[i].getReg(), TRI) << ":fi#" << FI << ":sp";
+ dbgs() << ' ' << printReg(I.getReg(), TRI) << ":fi#" << FI << ":sp";
if (Off >= 0)
dbgs() << '+';
dbgs() << Off;
@@ -2634,8 +2634,8 @@ bool HexagonFrameLowering::shouldInlineCSR(const MachineFunction &MF,
// Check if CSI only has double registers, and if the registers form
// a contiguous block starting from D8.
BitVector Regs(Hexagon::NUM_TARGET_REGS);
- for (unsigned i = 0, n = CSI.size(); i < n; ++i) {
- unsigned R = CSI[i].getReg();
+ for (const CalleeSavedInfo &I : CSI) {
+ unsigned R = I.getReg();
if (!Hexagon::DoubleRegsRegClass.contains(R))
return true;
Regs[R] = true;
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
index 02da2f29591a..46c1fbc6eeb2 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
@@ -597,19 +597,12 @@ void HexagonGenInsert::dump_map() const {
void HexagonGenInsert::buildOrderingMF(RegisterOrdering &RO) const {
unsigned Index = 0;
- using mf_iterator = MachineFunction::const_iterator;
-
- for (mf_iterator A = MFN->begin(), Z = MFN->end(); A != Z; ++A) {
- const MachineBasicBlock &B = *A;
+ for (const MachineBasicBlock &B : *MFN) {
if (!CMS->BT.reached(&B))
continue;
- using mb_iterator = MachineBasicBlock::const_iterator;
-
- for (mb_iterator I = B.begin(), E = B.end(); I != E; ++I) {
- const MachineInstr *MI = &*I;
- for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (const MachineInstr &MI : B) {
+ for (const MachineOperand &MO : MI.operands()) {
if (MO.isReg() && MO.isDef()) {
Register R = MO.getReg();
assert(MO.getSubReg() == 0 && "Unexpected subregister in definition");
@@ -725,8 +718,7 @@ bool HexagonGenInsert::findNonSelfReference(unsigned VR) const {
void HexagonGenInsert::getInstrDefs(const MachineInstr *MI,
RegisterSet &Defs) const {
- for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (const MachineOperand &MO : MI->operands()) {
if (!MO.isReg() || !MO.isDef())
continue;
Register R = MO.getReg();
@@ -738,8 +730,7 @@ void HexagonGenInsert::getInstrDefs(const MachineInstr *MI,
void HexagonGenInsert::getInstrUses(const MachineInstr *MI,
RegisterSet &Uses) const {
- for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (const MachineOperand &MO : MI->operands()) {
if (!MO.isReg() || !MO.isUse())
continue;
Register R = MO.getReg();
@@ -942,12 +933,11 @@ void HexagonGenInsert::collectInBlock(MachineBasicBlock *B,
// can remove them from the list of available registers once all DT
// successors have been processed.
RegisterSet BlockDefs, InsDefs;
- for (MachineBasicBlock::iterator I = B->begin(), E = B->end(); I != E; ++I) {
- MachineInstr *MI = &*I;
+ for (MachineInstr &MI : *B) {
InsDefs.clear();
- getInstrDefs(MI, InsDefs);
+ getInstrDefs(&MI, InsDefs);
// Leave those alone. They are more transparent than "insert".
- bool Skip = MI->isCopy() || MI->isRegSequence();
+ bool Skip = MI.isCopy() || MI.isRegSequence();
if (!Skip) {
// Visit all defined registers, and attempt to find the corresponding
@@ -1458,8 +1448,7 @@ bool HexagonGenInsert::removeDeadCode(MachineDomTreeNode *N) {
for (auto I = B->rbegin(), E = B->rend(); I != E; ++I)
Instrs.push_back(&*I);
- for (auto I = Instrs.begin(), E = Instrs.end(); I != E; ++I) {
- MachineInstr *MI = *I;
+ for (MachineInstr *MI : Instrs) {
unsigned Opc = MI->getOpcode();
// Do not touch lifetime markers. This is why the target-independent DCE
// cannot be used.
@@ -1501,7 +1490,7 @@ bool HexagonGenInsert::runOnMachineFunction(MachineFunction &MF) {
bool Timing = OptTiming, TimingDetail = Timing && OptTimingDetail;
bool Changed = false;
- // Sanity check: one, but not both.
+ // Verify: one, but not both.
assert(!OptSelectAll0 || !OptSelectHas0);
IFMap.clear();
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonGenMux.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonGenMux.cpp
index cf4f13fb8c0d..55de02816fb8 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonGenMux.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonGenMux.cpp
@@ -328,7 +328,7 @@ bool HexagonGenMux::genMuxInBlock(MachineBasicBlock &B) {
unsigned MxOpc = getMuxOpcode(*MX.SrcT, *MX.SrcF);
if (!MxOpc)
continue;
- // Basic sanity check: since we are deleting instructions, validate the
+ // Basic correctness check: since we are deleting instructions, validate the
// iterators. There is a possibility that one of Def1 or Def2 is translated
// to "mux" and being considered for other "mux" instructions.
if (!MX.At->getParent() || !MX.Def1->getParent() || !MX.Def2->getParent())
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp
index d8d2025c5d27..1a66394e9757 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp
@@ -205,16 +205,14 @@ bool HexagonGenPredicate::isConvertibleToPredForm(const MachineInstr *MI) {
}
void HexagonGenPredicate::collectPredicateGPR(MachineFunction &MF) {
- for (MachineFunction::iterator A = MF.begin(), Z = MF.end(); A != Z; ++A) {
- MachineBasicBlock &B = *A;
- for (MachineBasicBlock::iterator I = B.begin(), E = B.end(); I != E; ++I) {
- MachineInstr *MI = &*I;
- unsigned Opc = MI->getOpcode();
+ for (MachineBasicBlock &B : MF) {
+ for (MachineInstr &MI : B) {
+ unsigned Opc = MI.getOpcode();
switch (Opc) {
case Hexagon::C2_tfrpr:
case TargetOpcode::COPY:
- if (isPredReg(MI->getOperand(1).getReg())) {
- RegisterSubReg RD = MI->getOperand(0);
+ if (isPredReg(MI.getOperand(1).getReg())) {
+ RegisterSubReg RD = MI.getOperand(0);
if (RD.R.isVirtual())
PredGPRs.insert(RD);
}
@@ -411,7 +409,7 @@ bool HexagonGenPredicate::convertToPredForm(MachineInstr *MI) {
NumOps = 2;
}
- // Some sanity: check that def is in operand #0.
+ // Check that def is in operand #0.
MachineOperand &Op0 = MI->getOperand(0);
assert(Op0.isDef());
RegisterSubReg OutR(Op0);
@@ -488,8 +486,8 @@ bool HexagonGenPredicate::eliminatePredCopies(MachineFunction &MF) {
}
}
- for (VectOfInst::iterator I = Erase.begin(), E = Erase.end(); I != E; ++I)
- (*I)->eraseFromParent();
+ for (MachineInstr *MI : Erase)
+ MI->eraseFromParent();
return Changed;
}
@@ -515,11 +513,8 @@ bool HexagonGenPredicate::runOnMachineFunction(MachineFunction &MF) {
Again = false;
VectOfInst Processed, Copy;
- using iterator = VectOfInst::iterator;
-
Copy = PUsers;
- for (iterator I = Copy.begin(), E = Copy.end(); I != E; ++I) {
- MachineInstr *MI = *I;
+ for (MachineInstr *MI : Copy) {
bool Done = convertToPredForm(MI);
if (Done) {
Processed.insert(MI);
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
index a4971ad712eb..5d2e1b259449 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -1014,12 +1014,10 @@ bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L,
LLVM_DEBUG(dbgs() << "\nhw_loop head, "
<< printMBBReference(**L->block_begin()));
for (MachineBasicBlock *MBB : L->getBlocks()) {
- for (MachineBasicBlock::iterator
- MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) {
- const MachineInstr *MI = &*MII;
- if (isInvalidLoopOperation(MI, IsInnerHWLoop)) {
+ for (const MachineInstr &MI : *MBB) {
+ if (isInvalidLoopOperation(&MI, IsInnerHWLoop)) {
LLVM_DEBUG(dbgs() << "\nCannot convert to hw_loop due to:";
- MI->dump(););
+ MI.dump(););
return true;
}
}
@@ -1034,8 +1032,7 @@ bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L,
bool HexagonHardwareLoops::isDead(const MachineInstr *MI,
SmallVectorImpl<MachineInstr *> &DeadPhis) const {
// Examine each operand.
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (const MachineOperand &MO : MI->operands()) {
if (!MO.isReg() || !MO.isDef())
continue;
@@ -1089,8 +1086,7 @@ void HexagonHardwareLoops::removeIfDead(MachineInstr *MI) {
// It is possible that some DBG_VALUE instructions refer to this
// instruction. Examine each def operand for such references;
// if found, mark the DBG_VALUE as undef (but don't delete it).
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (const MachineOperand &MO : MI->operands()) {
if (!MO.isReg() || !MO.isDef())
continue;
Register Reg = MO.getReg();
@@ -1123,7 +1119,7 @@ void HexagonHardwareLoops::removeIfDead(MachineInstr *MI) {
bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L,
bool &RecL0used,
bool &RecL1used) {
- // This is just for sanity.
+ // This is just to confirm basic correctness.
assert(L->getHeader() && "Loop without a header?");
bool Changed = false;
@@ -1877,8 +1873,7 @@ MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop(
if (TII->analyzeBranch(*ExitingBlock, TB, FB, Tmp1, false))
return nullptr;
- for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) {
- MachineBasicBlock *PB = *I;
+ for (MachineBasicBlock *PB : Preds) {
bool NotAnalyzed = TII->analyzeBranch(*PB, TB, FB, Tmp1, false);
if (NotAnalyzed)
return nullptr;
@@ -1960,8 +1955,7 @@ MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop(
TB = FB = nullptr;
- for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) {
- MachineBasicBlock *PB = *I;
+ for (MachineBasicBlock *PB : Preds) {
if (PB != Latch) {
Tmp2.clear();
bool NotAnalyzed = TII->analyzeBranch(*PB, TB, FB, Tmp2, false);
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
index b50a0e29ecae..ed4874baf7c8 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
@@ -1006,7 +1006,7 @@ static void packSegmentMask(ArrayRef<int> Mask, ArrayRef<unsigned> OutSegMap,
static bool isPermutation(ArrayRef<int> Mask) {
// Check by adding all numbers only works if there is no overflow.
- assert(Mask.size() < 0x00007FFF && "Sanity failure");
+ assert(Mask.size() < 0x00007FFF && "Overflow failure");
int Sum = 0;
for (int Idx : Mask) {
if (Idx == -1)
@@ -1217,7 +1217,7 @@ OpRef HvxSelector::packs(ShuffleMask SM, OpRef Va, OpRef Vb,
} else if (Seg0 == ~1u) {
Seg0 = SegList[0] != Seg1 ? SegList[0] : SegList[1];
} else {
- assert(Seg1 == ~1u); // Sanity
+ assert(Seg1 == ~1u);
Seg1 = SegList[0] != Seg0 ? SegList[0] : SegList[1];
}
}
@@ -1265,7 +1265,7 @@ OpRef HvxSelector::packs(ShuffleMask SM, OpRef Va, OpRef Vb,
} else {
// BC or DA: this could be done via valign by SegLen.
// Do nothing here, because valign (if possible) will be generated
- // later on (make sure the Seg0 values are as expected, for sanity).
+ // later on (make sure the Seg0 values are as expected).
assert(Seg0 == 1 || Seg0 == 3);
}
}
@@ -1414,7 +1414,7 @@ OpRef HvxSelector::shuffs1(ShuffleMask SM, OpRef Va, ResultStack &Results) {
return OpRef::undef(getSingleVT(MVT::i8));
unsigned HalfLen = HwLen / 2;
- assert(isPowerOf2_32(HalfLen)); // Sanity.
+ assert(isPowerOf2_32(HalfLen));
// Handle special case where the output is the same half of the input
// repeated twice, i.e. if Va = AB, then handle the output of AA or BB.
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index 29572e3106d1..88effed9f076 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -442,8 +442,7 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
CLI.IsTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
IsVarArg, IsStructRet, StructAttrFlag, Outs,
OutVals, Ins, DAG);
- for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
- CCValAssign &VA = ArgLocs[i];
+ for (const CCValAssign &VA : ArgLocs) {
if (VA.isMemLoc()) {
CLI.IsTailCall = false;
break;
@@ -2549,7 +2548,8 @@ HexagonTargetLowering::extractVector(SDValue VecV, SDValue IdxV,
// Special case for v{8,4,2}i1 (the only boolean vectors legal in Hexagon
// without any coprocessors).
if (ElemWidth == 1) {
- assert(VecWidth == VecTy.getVectorNumElements() && "Sanity failure");
+ assert(VecWidth == VecTy.getVectorNumElements() &&
+ "Vector elements should equal vector width size");
assert(VecWidth == 8 || VecWidth == 4 || VecWidth == 2);
// Check if this is an extract of the lowest bit.
if (IdxN) {
@@ -2863,8 +2863,7 @@ HexagonTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
Scale /= 2;
}
- // Another sanity check. At this point there should only be two words
- // left, and Scale should be 2.
+ // At this point there should only be two words left, and Scale should be 2.
assert(Scale == 2 && Words[IdxW].size() == 2);
SDValue WW = DAG.getNode(HexagonISD::COMBINE, dl, MVT::i64,
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
index 8900fca8bb78..f7237f496aee 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@@ -9,6 +9,7 @@
#include "HexagonISelLowering.h"
#include "HexagonRegisterInfo.h"
#include "HexagonSubtarget.h"
+#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/IR/IntrinsicsHexagon.h"
#include "llvm/Support/CommandLine.h"
@@ -1846,16 +1847,18 @@ HexagonTargetLowering::SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = MemN->getChain();
SDValue Base0 = MemN->getBasePtr();
SDValue Base1 = DAG.getMemBasePlusOffset(Base0, TypeSize::Fixed(HwLen), dl);
+ unsigned MemOpc = MemN->getOpcode();
MachineMemOperand *MOp0 = nullptr, *MOp1 = nullptr;
if (MachineMemOperand *MMO = MemN->getMemOperand()) {
MachineFunction &MF = DAG.getMachineFunction();
- MOp0 = MF.getMachineMemOperand(MMO, 0, HwLen);
- MOp1 = MF.getMachineMemOperand(MMO, HwLen, HwLen);
+ uint64_t MemSize = (MemOpc == ISD::MLOAD || MemOpc == ISD::MSTORE)
+ ? (uint64_t)MemoryLocation::UnknownSize
+ : HwLen;
+ MOp0 = MF.getMachineMemOperand(MMO, 0, MemSize);
+ MOp1 = MF.getMachineMemOperand(MMO, HwLen, MemSize);
}
- unsigned MemOpc = MemN->getOpcode();
-
if (MemOpc == ISD::LOAD) {
assert(cast<LoadSDNode>(Op)->isUnindexed());
SDValue Load0 = DAG.getLoad(SingleTy, dl, Chain, Base0, MOp0);
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
index 76220eff4d51..b6984d40f78e 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -193,9 +193,7 @@ static inline void parseOperands(const MachineInstr &MI,
Defs.clear();
Uses.clear();
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI.getOperand(i);
-
+ for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg())
continue;
@@ -1644,8 +1642,7 @@ bool HexagonInstrInfo::ClobbersPredicate(MachineInstr &MI,
bool SkipDead) const {
const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
- for (unsigned oper = 0; oper < MI.getNumOperands(); ++oper) {
- MachineOperand MO = MI.getOperand(oper);
+ for (const MachineOperand &MO : MI.operands()) {
if (MO.isReg()) {
if (!MO.isDef())
continue;
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp
index 9507de95231f..987c4a5fa6c4 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp
@@ -109,8 +109,7 @@ void llvm::HexagonLowerToMC(const MCInstrInfo &MCII, const MachineInstr *MI,
assert(MCI->getOpcode() == static_cast<unsigned>(MI->getOpcode()) &&
"MCI opcode should have been set on construction");
- for (unsigned i = 0, e = MI->getNumOperands(); i < e; i++) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (const MachineOperand &MO : MI->operands()) {
MCOperand MCO;
bool MustExtend = MO.getTargetFlags() & HexagonII::HMOTF_ConstExtended;
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonPeephole.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonPeephole.cpp
index fc31139e13ce..1ff248200572 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonPeephole.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonPeephole.cpp
@@ -120,16 +120,12 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
if (DisableHexagonPeephole) return false;
// Loop over all of the basic blocks.
- for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end();
- MBBb != MBBe; ++MBBb) {
- MachineBasicBlock *MBB = &*MBBb;
+ for (MachineBasicBlock &MBB : MF) {
PeepholeMap.clear();
PeepholeDoubleRegsMap.clear();
// Traverse the basic block.
- for (auto I = MBB->begin(), E = MBB->end(), NextI = I; I != E; I = NextI) {
- NextI = std::next(I);
- MachineInstr &MI = *I;
+ for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
// Look for sign extends:
// %170 = SXTW %166
if (!DisableOptSZExt && MI.getOpcode() == Hexagon::A2_sxtw) {
@@ -274,11 +270,11 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
if (NewOp) {
Register PSrc = MI.getOperand(PR).getReg();
if (unsigned POrig = PeepholeMap.lookup(PSrc)) {
- BuildMI(*MBB, MI.getIterator(), MI.getDebugLoc(),
- QII->get(NewOp), MI.getOperand(0).getReg())
- .addReg(POrig)
- .add(MI.getOperand(S2))
- .add(MI.getOperand(S1));
+ BuildMI(MBB, MI.getIterator(), MI.getDebugLoc(), QII->get(NewOp),
+ MI.getOperand(0).getReg())
+ .addReg(POrig)
+ .add(MI.getOperand(S2))
+ .add(MI.getOperand(S1));
MRI->clearKillFlags(POrig);
MI.eraseFromParent();
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp
index 93ba277b0c9d..2c5c64cfcfc6 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp
@@ -400,8 +400,7 @@ bool HexagonStoreWidening::createWideStores(InstrGroup &OG, InstrGroup &NG,
unsigned Acc = 0; // Value accumulator.
unsigned Shift = 0;
- for (InstrGroup::iterator I = OG.begin(), E = OG.end(); I != E; ++I) {
- MachineInstr *MI = *I;
+ for (MachineInstr *MI : OG) {
const MachineMemOperand &MMO = getStoreTarget(MI);
MachineOperand &SO = MI->getOperand(2); // Source.
assert(SO.isImm() && "Expecting an immediate operand");
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
index 87b1c43961d7..ecb2f88d8096 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -305,8 +305,7 @@ void HexagonSubtarget::CallMutation::apply(ScheduleDAGInstrs *DAGInstrs) {
VRegHoldingReg[MI->getOperand(0).getReg()] = MI->getOperand(1).getReg();
LastVRegUse.erase(MI->getOperand(1).getReg());
} else {
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (const MachineOperand &MO : MI->operands()) {
if (!MO.isReg())
continue;
if (MO.isUse() && !MI->isCopy() &&
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
index 897fb209a8bf..ea2798a3b44e 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
@@ -749,7 +749,6 @@ auto AlignVectors::realignGroup(const MoveGroup &Move) const -> bool {
WithMaxAlign.ValTy, Adjust);
int Diff = Start - (OffAtMax + Adjust);
AlignVal = HVC.getConstInt(Diff);
- // Sanity.
assert(Diff >= 0);
assert(static_cast<decltype(MinNeeded.value())>(Diff) < MinNeeded.value());
} else {
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiDelaySlotFiller.cpp b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiDelaySlotFiller.cpp
index b9e577d201f9..cafe93bf8f4b 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiDelaySlotFiller.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiDelaySlotFiller.cpp
@@ -51,9 +51,8 @@ struct Filler : public MachineFunctionPass {
TRI = Subtarget.getRegisterInfo();
bool Changed = false;
- for (MachineFunction::iterator FI = MF.begin(), FE = MF.end(); FI != FE;
- ++FI)
- Changed |= runOnMachineBasicBlock(*FI);
+ for (MachineBasicBlock &MBB : MF)
+ Changed |= runOnMachineBasicBlock(MBB);
return Changed;
}
@@ -200,8 +199,7 @@ bool Filler::delayHasHazard(MachineBasicBlock::instr_iterator MI, bool &SawLoad,
assert((!MI->isCall() && !MI->isReturn()) &&
"Cannot put calls or returns in delay slot.");
- for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
- const MachineOperand &MO = MI->getOperand(I);
+ for (const MachineOperand &MO : MI->operands()) {
unsigned Reg;
if (!MO.isReg() || !(Reg = MO.getReg()))
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiFrameLowering.cpp
index 3a2d5030775e..3644eafe4353 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiFrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiFrameLowering.cpp
@@ -65,17 +65,14 @@ void LanaiFrameLowering::replaceAdjDynAllocPseudo(MachineFunction &MF) const {
*static_cast<const LanaiInstrInfo *>(STI.getInstrInfo());
unsigned MaxCallFrameSize = MF.getFrameInfo().getMaxCallFrameSize();
- for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); MBB != E;
- ++MBB) {
- MachineBasicBlock::iterator MBBI = MBB->begin();
- while (MBBI != MBB->end()) {
- MachineInstr &MI = *MBBI++;
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
if (MI.getOpcode() == Lanai::ADJDYNALLOC) {
DebugLoc DL = MI.getDebugLoc();
Register Dst = MI.getOperand(0).getReg();
Register Src = MI.getOperand(1).getReg();
- BuildMI(*MBB, MI, DL, LII.get(Lanai::ADD_I_LO), Dst)
+ BuildMI(MBB, MI, DL, LII.get(Lanai::ADD_I_LO), Dst)
.addReg(Src)
.addImm(MaxCallFrameSize);
MI.eraseFromParent();
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp
index 21d035c7ee9c..4217b8509676 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp
@@ -467,8 +467,7 @@ static MachineInstr *canFoldIntoSelect(Register Reg,
return nullptr;
// Check if MI has any non-dead defs or physreg uses. This also detects
// predicated instructions which will be reading SR.
- for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 1)) {
// Reject frame index operands.
if (MO.isFI() || MO.isCPI() || MO.isJTI())
return nullptr;
diff --git a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiMCInstLower.cpp b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiMCInstLower.cpp
index 743f4f7c6e2f..479c0b1f0358 100644
--- a/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiMCInstLower.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Lanai/LanaiMCInstLower.cpp
@@ -93,9 +93,7 @@ MCOperand LanaiMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
void LanaiMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
OutMI.setOpcode(MI->getOpcode());
- for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
- const MachineOperand &MO = MI->getOperand(I);
-
+ for (const MachineOperand &MO : MI->operands()) {
MCOperand MCOp;
switch (MO.getType()) {
case MachineOperand::MO_Register:
diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp
index a83a5d2dfcc9..2a77a150f9aa 100644
--- a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp
@@ -211,8 +211,8 @@ bool MSP430FrameLowering::restoreCalleeSavedRegisters(
MachineFunction &MF = *MBB.getParent();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
- for (unsigned i = 0, e = CSI.size(); i != e; ++i)
- BuildMI(MBB, MI, DL, TII.get(MSP430::POP16r), CSI[i].getReg());
+ for (const CalleeSavedInfo &I : CSI)
+ BuildMI(MBB, MI, DL, TII.get(MSP430::POP16r), I.getReg());
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp
index 1e57f33386e6..52c037de7660 100644
--- a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp
@@ -115,9 +115,7 @@ LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const {
void MSP430MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
OutMI.setOpcode(MI->getOpcode());
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
-
+ for (const MachineOperand &MO : MI->operands()) {
MCOperand MCOp;
switch (MO.getType()) {
default:
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/Mips16FrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/Mips16FrameLowering.cpp
index fefa1134b021..622f2039f9e4 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/Mips16FrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/Mips16FrameLowering.cpp
@@ -72,10 +72,9 @@ void Mips16FrameLowering::emitPrologue(MachineFunction &MF,
if (!CSI.empty()) {
const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
- for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
- E = CSI.end(); I != E; ++I) {
- int64_t Offset = MFI.getObjectOffset(I->getFrameIdx());
- unsigned Reg = I->getReg();
+ for (const CalleeSavedInfo &I : CSI) {
+ int64_t Offset = MFI.getObjectOffset(I.getFrameIdx());
+ unsigned Reg = I.getReg();
unsigned DReg = MRI->getDwarfRegNum(Reg, true);
unsigned CFIIndex = MF.addFrameInst(
MCCFIInstruction::createOffset(nullptr, DReg, Offset));
@@ -119,13 +118,13 @@ bool Mips16FrameLowering::spillCalleeSavedRegisters(
// will be saved with the "save" instruction
// during emitPrologue
//
- for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ for (const CalleeSavedInfo &I : CSI) {
// Add the callee-saved register as live-in. Do not add if the register is
// RA and return address is taken, because it has already been added in
// method MipsTargetLowering::lowerRETURNADDR.
// It's killed at the spill, unless the register is RA and return address
// is taken.
- unsigned Reg = CSI[i].getReg();
+ unsigned Reg = I.getReg();
bool IsRAAndRetAddrIsTaken = (Reg == Mips::RA)
&& MF->getFrameInfo().isReturnAddressTaken();
if (!IsRAAndRetAddrIsTaken)
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
index 6d3f3adb2b7a..5d026785b921 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -163,9 +163,8 @@ static void emitDirectiveRelocJalr(const MachineInstr &MI,
TargetMachine &TM,
MCStreamer &OutStreamer,
const MipsSubtarget &Subtarget) {
- for (unsigned int I = MI.getDesc().getNumOperands(), E = MI.getNumOperands();
- I < E; ++I) {
- MachineOperand MO = MI.getOperand(I);
+ for (const MachineOperand &MO :
+ llvm::drop_begin(MI.operands(), MI.getDesc().getNumOperands())) {
if (MO.isMCSymbol() && (MO.getTargetFlags() & MipsII::MO_JALR)) {
MCSymbol *Callee = MO.getMCSymbol();
if (Callee && !Callee->getName().empty()) {
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp
index 8e619549f01c..491d379bfe0b 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp
@@ -637,8 +637,8 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
// has any inline assembly in it. If so, we have to be conservative about
// alignment assumptions, as we don't know for sure the size of any
// instructions in the inline assembly.
- for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I)
- computeBlockSize(&*I);
+ for (MachineBasicBlock &MBB : *MF)
+ computeBlockSize(&MBB);
// Compute block offsets.
adjustBBOffsetsAfter(&MF->front());
@@ -730,8 +730,8 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
continue;
// Scan the instructions for constant pool operands.
- for (unsigned op = 0, e = MI.getNumOperands(); op != e; ++op)
- if (MI.getOperand(op).isCPI()) {
+ for (const MachineOperand &MO : MI.operands())
+ if (MO.isCPI()) {
// We found one. The addressing mode tells us the max displacement
// from the PC that this instruction permits.
@@ -759,7 +759,7 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
break;
}
// Remember that this is a user of a CP entry.
- unsigned CPI = MI.getOperand(op).getIndex();
+ unsigned CPI = MO.getIndex();
MachineInstr *CPEMI = CPEMIs[CPI];
unsigned MaxOffs = ((1 << Bits)-1) * Scale;
unsigned LongFormMaxOffs = ((1 << LongFormBits)-1) * LongFormScale;
@@ -1066,9 +1066,9 @@ int MipsConstantIslands::findInRangeCPEntry(CPUser& U, unsigned UserOffset)
// Point the CPUser node to the replacement
U.CPEMI = CPEs[i].CPEMI;
// Change the CPI in the instruction operand to refer to the clone.
- for (unsigned j = 0, e = UserMI->getNumOperands(); j != e; ++j)
- if (UserMI->getOperand(j).isCPI()) {
- UserMI->getOperand(j).setIndex(CPEs[i].CPI);
+ for (MachineOperand &MO : UserMI->operands())
+ if (MO.isCPI()) {
+ MO.setIndex(CPEs[i].CPI);
break;
}
// Adjust the refcount of the clone...
@@ -1122,9 +1122,9 @@ int MipsConstantIslands::findLongFormInRangeCPEntry
// Point the CPUser node to the replacement
U.CPEMI = CPEs[i].CPEMI;
// Change the CPI in the instruction operand to refer to the clone.
- for (unsigned j = 0, e = UserMI->getNumOperands(); j != e; ++j)
- if (UserMI->getOperand(j).isCPI()) {
- UserMI->getOperand(j).setIndex(CPEs[i].CPI);
+ for (MachineOperand &MO : UserMI->operands())
+ if (MO.isCPI()) {
+ MO.setIndex(CPEs[i].CPI);
break;
}
// Adjust the refcount of the clone...
@@ -1392,9 +1392,9 @@ bool MipsConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
adjustBBOffsetsAfter(&*--NewIsland->getIterator());
// Finally, change the CPI in the instruction operand to be ID.
- for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i)
- if (UserMI->getOperand(i).isCPI()) {
- UserMI->getOperand(i).setIndex(ID);
+ for (MachineOperand &MO : UserMI->operands())
+ if (MO.isCPI()) {
+ MO.setIndex(ID);
break;
}
@@ -1633,10 +1633,10 @@ MipsConstantIslands::fixupConditionalBr(ImmBranch &Br) {
void MipsConstantIslands::prescanForConstants() {
unsigned J = 0;
(void)J;
- for (MachineFunction::iterator B =
- MF->begin(), E = MF->end(); B != E; ++B) {
- for (MachineBasicBlock::instr_iterator I =
- B->instr_begin(), EB = B->instr_end(); I != EB; ++I) {
+ for (MachineBasicBlock &B : *MF) {
+ for (MachineBasicBlock::instr_iterator I = B.instr_begin(),
+ EB = B.instr_end();
+ I != EB; ++I) {
switch(I->getDesc().getOpcode()) {
case Mips::LwConstant32: {
PrescannedForConstants = true;
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
index c2e3d7393a6d..2d27d7553de6 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -218,9 +218,8 @@ namespace {
bool runOnMachineFunction(MachineFunction &F) override {
TM = &F.getTarget();
bool Changed = false;
- for (MachineFunction::iterator FI = F.begin(), FE = F.end();
- FI != FE; ++FI)
- Changed |= runOnMachineBasicBlock(*FI);
+ for (MachineBasicBlock &MBB : F)
+ Changed |= runOnMachineBasicBlock(MBB);
// This pass invalidates liveness information when it reorders
// instructions to fill delay slot. Without this, -verify-machineinstrs
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsExpandPseudo.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsExpandPseudo.cpp
index f72dc1da4131..31180d5a23ef 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsExpandPseudo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsExpandPseudo.cpp
@@ -896,9 +896,8 @@ bool MipsExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
TII = STI->getInstrInfo();
bool Modified = false;
- for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E;
- ++MFI)
- Modified |= expandMBB(*MFI);
+ for (MachineBasicBlock &MBB : MF)
+ Modified |= expandMBB(MBB);
if (Modified)
MF.RenumberBlocks();
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsFrameLowering.h b/contrib/llvm-project/llvm/lib/Target/Mips/MipsFrameLowering.h
index 612b2b712fa8..710a3d40c38e 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsFrameLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsFrameLowering.h
@@ -34,7 +34,10 @@ public:
bool hasBP(const MachineFunction &MF) const;
- bool isFPCloseToIncomingSP() const override { return false; }
+ bool allocateScavengingFrameIndexesNearIncomingSP(
+ const MachineFunction &MF) const override {
+ return false;
+ }
bool enableShrinkWrapping(const MachineFunction &MF) const override {
return true;
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsMCInstLower.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsMCInstLower.cpp
index 66e04bda2af3..7b58cb90ab87 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsMCInstLower.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsMCInstLower.cpp
@@ -318,8 +318,7 @@ void MipsMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
OutMI.setOpcode(MI->getOpcode());
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (const MachineOperand &MO : MI->operands()) {
MCOperand MCOp = LowerOperand(MO);
if (MCOp.isValid())
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp
index bb4b9c6fa6a7..193d071447ff 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp
@@ -452,10 +452,9 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF,
// Iterate over list of callee-saved registers and emit .cfi_offset
// directives.
- for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
- E = CSI.end(); I != E; ++I) {
- int64_t Offset = MFI.getObjectOffset(I->getFrameIdx());
- unsigned Reg = I->getReg();
+ for (const CalleeSavedInfo &I : CSI) {
+ int64_t Offset = MFI.getObjectOffset(I.getFrameIdx());
+ unsigned Reg = I.getReg();
// If Reg is a double precision register, emit two cfa_offsets,
// one for each of the paired single precision registers.
@@ -796,13 +795,13 @@ bool MipsSEFrameLowering::spillCalleeSavedRegisters(
MachineFunction *MF = MBB.getParent();
const TargetInstrInfo &TII = *STI.getInstrInfo();
- for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ for (const CalleeSavedInfo &I : CSI) {
// Add the callee-saved register as live-in. Do not add if the register is
// RA and return address is taken, because it has already been added in
// method MipsTargetLowering::lowerRETURNADDR.
// It's killed at the spill, unless the register is RA and return address
// is taken.
- unsigned Reg = CSI[i].getReg();
+ unsigned Reg = I.getReg();
bool IsRAAndRetAddrIsTaken = (Reg == Mips::RA || Reg == Mips::RA_64)
&& MF->getFrameInfo().isReturnAddressTaken();
if (!IsRAAndRetAddrIsTaken)
@@ -831,8 +830,7 @@ bool MipsSEFrameLowering::spillCalleeSavedRegisters(
// Insert the spill to the stack frame.
bool IsKill = !IsRAAndRetAddrIsTaken;
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- TII.storeRegToStackSlot(MBB, MI, Reg, IsKill,
- CSI[i].getFrameIdx(), RC, TRI);
+ TII.storeRegToStackSlot(MBB, MI, Reg, IsKill, I.getFrameIdx(), RC, TRI);
}
return true;
diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
index 1fe6ab09804b..40b215a8204c 100644
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -3581,8 +3581,8 @@ MipsSETargetLowering::emitLD_F16_PSEUDO(MachineInstr &MI,
MachineInstrBuilder MIB =
BuildMI(*BB, MI, DL, TII->get(UsingMips32 ? Mips::LH : Mips::LH64), Rt);
- for (unsigned i = 1; i < MI.getNumOperands(); i++)
- MIB.add(MI.getOperand(i));
+ for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
+ MIB.add(MO);
if(!UsingMips32) {
Register Tmp = RegInfo.createVirtualRegister(&Mips::GPR32RegClass);
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index aab6d2034f11..c35e67d6726f 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -130,10 +130,8 @@ VisitGlobalVariableForEmission(const GlobalVariable *GV,
for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i)
DiscoverDependentGlobals(GV->getOperand(i), Others);
- for (DenseSet<const GlobalVariable *>::iterator I = Others.begin(),
- E = Others.end();
- I != E; ++I)
- VisitGlobalVariableForEmission(*I, Order, Visited, Visiting);
+ for (const GlobalVariable *GV : Others)
+ VisitGlobalVariableForEmission(GV, Order, Visited, Visiting);
// Now we can visit ourself
Order.push_back(GV);
@@ -699,35 +697,33 @@ static bool useFuncSeen(const Constant *C,
void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
DenseMap<const Function *, bool> seenMap;
- for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) {
- const Function *F = &*FI;
-
- if (F->getAttributes().hasFnAttr("nvptx-libcall-callee")) {
- emitDeclaration(F, O);
+ for (const Function &F : M) {
+ if (F.getAttributes().hasFnAttr("nvptx-libcall-callee")) {
+ emitDeclaration(&F, O);
continue;
}
- if (F->isDeclaration()) {
- if (F->use_empty())
+ if (F.isDeclaration()) {
+ if (F.use_empty())
continue;
- if (F->getIntrinsicID())
+ if (F.getIntrinsicID())
continue;
- emitDeclaration(F, O);
+ emitDeclaration(&F, O);
continue;
}
- for (const User *U : F->users()) {
+ for (const User *U : F.users()) {
if (const Constant *C = dyn_cast<Constant>(U)) {
if (usedInGlobalVarDef(C)) {
// The use is in the initialization of a global variable
// that is a function pointer, so print a declaration
// for the original function
- emitDeclaration(F, O);
+ emitDeclaration(&F, O);
break;
}
// Emit a declaration of this function if the function that
// uses this constant expr has already been seen.
if (useFuncSeen(C, seenMap)) {
- emitDeclaration(F, O);
+ emitDeclaration(&F, O);
break;
}
}
@@ -746,11 +742,11 @@ void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
// appearing in the module before the callee. so print out
// a declaration for the callee.
if (seenMap.find(caller) != seenMap.end()) {
- emitDeclaration(F, O);
+ emitDeclaration(&F, O);
break;
}
}
- seenMap[F] = true;
+ seenMap[&F] = true;
}
}
@@ -887,33 +883,11 @@ bool NVPTXAsmPrinter::doFinalization(Module &M) {
GlobalsEmitted = true;
}
- // XXX Temproarily remove global variables so that doFinalization() will not
- // emit them again (global variables are emitted at beginning).
-
- Module::GlobalListType &global_list = M.getGlobalList();
- int i, n = global_list.size();
- GlobalVariable **gv_array = new GlobalVariable *[n];
-
- // first, back-up GlobalVariable in gv_array
- i = 0;
- for (Module::global_iterator I = global_list.begin(), E = global_list.end();
- I != E; ++I)
- gv_array[i++] = &*I;
-
- // second, empty global_list
- while (!global_list.empty())
- global_list.remove(global_list.begin());
-
// call doFinalization
bool ret = AsmPrinter::doFinalization(M);
- // now we restore global variables
- for (i = 0; i < n; i++)
- global_list.insert(global_list.end(), gv_array[i]);
-
clearAnnotationCache(&M);
- delete[] gv_array;
// Close the last emitted section
if (HasDebugInfo) {
static_cast<NVPTXTargetStreamer *>(OutStreamer->getTargetStreamer())
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
index 5d680e731e4a..2a3a38d7b2f1 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
@@ -306,6 +306,11 @@ public:
std::string getVirtualRegisterName(unsigned) const;
const MCSymbol *getFunctionFrameSymbol() const override;
+
+ // Make emitGlobalVariable() no-op for NVPTX.
+ // Global variables have been already emitted by the time the base AsmPrinter
+ // attempts to do so in doFinalization() (see NVPTXAsmPrinter::emitGlobals()).
+ void emitGlobalVariable(const GlobalVariable *GV) override {}
};
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp
index a8a43cee9ab7..34b9dfe87cc2 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp
@@ -72,8 +72,7 @@ bool NVPTXAssignValidGlobalNames::runOnModule(Module &M) {
std::string NVPTXAssignValidGlobalNames::cleanUpName(StringRef Name) {
std::string ValidName;
raw_string_ostream ValidNameStream(ValidName);
- for (unsigned I = 0, E = Name.size(); I != E; ++I) {
- char C = Name[I];
+ for (char C : Name) {
if (C == '.' || C == '@') {
ValidNameStream << "_$_";
} else {
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
index e404cead344b..f4934f0bc20b 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
@@ -56,23 +56,16 @@ bool NVPTXReplaceImageHandles::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
InstrsToRemove.clear();
- for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE;
- ++BI) {
- for (MachineBasicBlock::iterator I = (*BI).begin(), E = (*BI).end();
- I != E; ++I) {
- MachineInstr &MI = *I;
+ for (MachineBasicBlock &MBB : MF)
+ for (MachineInstr &MI : MBB)
Changed |= processInstr(MI);
- }
- }
// Now clean up any handle-access instructions
// This is needed in debug mode when code cleanup passes are not executed,
// but we need the handle access to be eliminated because they are not
// valid instructions when image handles are disabled.
- for (DenseSet<MachineInstr *>::iterator I = InstrsToRemove.begin(),
- E = InstrsToRemove.end(); I != E; ++I) {
- (*I)->eraseFromParent();
- }
+ for (MachineInstr *MI : InstrsToRemove)
+ MI->eraseFromParent();
return Changed;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/P10InstrResources.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/P10InstrResources.td
index f43ba00ec373..f3ae0010ad8e 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/P10InstrResources.td
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/P10InstrResources.td
@@ -626,7 +626,9 @@ def : InstRW<[P10W_DX_5C, P10W_DISP_ANY, P10DX_Read, P10DX_Read],
// 5 Cycles Fixed-Point and BCD operations, 3 input operands
def : InstRW<[P10W_DX_5C, P10W_DISP_ANY, P10DX_Read, P10DX_Read, P10DX_Read],
(instrs
+ BCDADD_rec,
BCDS_rec,
+ BCDSUB_rec,
BCDTRUNC_rec,
VADDECUQ,
VADDEUQM,
@@ -1974,7 +1976,7 @@ def : InstRW<[P10W_SX, P10W_DISP_ANY],
ICBLQ,
ICBTLS,
ICCCI,
- LA,
+ LA, LA8,
LDMX,
MFDCR,
MFPMR,
@@ -2073,3 +2075,4 @@ def : InstRW<[P10W_vMU_7C, P10W_DISP_ANY, P10vMU_Read, P10vMU_Read, P10vMU_Read]
VMSUMUHM,
VMSUMUHS
)>;
+
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/P9InstrResources.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/P9InstrResources.td
index c4f4a2b3d796..f7c049951c54 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/P9InstrResources.td
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/P9InstrResources.td
@@ -151,6 +151,7 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C],
(instregex "ADD(4|8)(TLS)?(_)?$"),
(instregex "NEG(8)?(O)?$"),
(instregex "ADDI(S)?toc(HA|L)(8)?$"),
+ (instregex "LA(8)?$"),
COPY,
MCRF,
MCRXRX,
@@ -165,7 +166,6 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C],
SRADI_32,
RLDIC,
RFEBB,
- LA,
TBEGIN,
TRECHKPT,
NOP,
@@ -624,7 +624,9 @@ def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C],
BCDS_rec,
BCDTRUNC_rec,
BCDUS_rec,
- BCDUTRUNC_rec
+ BCDUTRUNC_rec,
+ BCDADD_rec,
+ BCDSUB_rec
)>;
// 12 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPC.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPC.td
index a1ff20bb3612..422bd11dca52 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPC.td
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPC.td
@@ -203,6 +203,22 @@ def FeatureLogicalFusion :
SubtargetFeature<"fuse-logical", "HasLogicalFusion", "true",
"Target supports Logical Operations fusion",
[FeatureFusion]>;
+def FeatureSha3Fusion :
+ SubtargetFeature<"fuse-sha3", "HasSha3Fusion", "true",
+ "Target supports SHA3 assist fusion",
+ [FeatureFusion]>;
+def FeatureCompareFusion:
+ SubtargetFeature<"fuse-cmp", "HasCompareFusion", "true",
+ "Target supports Comparison Operations fusion",
+ [FeatureFusion]>;
+def FeatureWideImmFusion:
+ SubtargetFeature<"fuse-wideimm", "HasWideImmFusion", "true",
+ "Target supports Wide-Immediate fusion",
+ [FeatureFusion]>;
+def FeatureZeroMoveFusion:
+ SubtargetFeature<"fuse-zeromove", "HasZeroMoveFusion", "true",
+ "Target supports move to SPR with branch fusion",
+ [FeatureFusion]>;
def FeatureUnalignedFloats :
SubtargetFeature<"allow-unaligned-fp-access", "AllowsUnalignedFPAccess",
"true", "CPU does not trap on unaligned FP access">;
@@ -393,7 +409,7 @@ def ProcessorFeatures {
// still exist with the exception of those we know are Power9 specific.
list<SubtargetFeature> FusionFeatures = [
FeatureStoreFusion, FeatureAddLogicalFusion, FeatureLogicalAddFusion,
- FeatureLogicalFusion, FeatureArithAddFusion
+ FeatureLogicalFusion, FeatureArithAddFusion, FeatureSha3Fusion,
];
list<SubtargetFeature> P10AdditionalFeatures =
!listconcat(FusionFeatures, [
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index a76963abb8e4..16e3b2b85c2e 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -875,18 +875,19 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
EmitToStreamer(*OutStreamer, TmpInst);
return;
}
- case PPC::ADDItoc: {
+ case PPC::ADDItoc:
+ case PPC::ADDItoc8: {
assert(IsAIX && TM.getCodeModel() == CodeModel::Small &&
- "Operand only valid in AIX 32 bit mode");
+ "PseudoOp only valid for small code model AIX");
- // Transform %rN = ADDItoc @op1, %r2.
+ // Transform %rN = ADDItoc/8 @op1, %r2.
LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);
// Change the opcode to load address.
- TmpInst.setOpcode(PPC::LA);
+ TmpInst.setOpcode((!IsPPC64) ? (PPC::LA) : (PPC::LA8));
const MachineOperand &MO = MI->getOperand(1);
- assert(MO.isGlobal() && "Invalid operand for ADDItoc.");
+ assert(MO.isGlobal() && "Invalid operand for ADDItoc[8].");
// Map the operand to its corresponding MCSymbol.
const MCSymbol *const MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this);
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp
index fa6713dcca80..4cac0e3551f6 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -120,16 +120,13 @@ unsigned PPCBSel::ComputeBlockSizes(MachineFunction &Fn) {
static_cast<const PPCInstrInfo *>(Fn.getSubtarget().getInstrInfo());
unsigned FuncSize = GetInitialOffset(Fn);
- for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
- ++MFI) {
- MachineBasicBlock *MBB = &*MFI;
-
+ for (MachineBasicBlock &MBB : Fn) {
// The end of the previous block may have extra nops if this block has an
// alignment requirement.
- if (MBB->getNumber() > 0) {
- unsigned AlignExtra = GetAlignmentAdjustment(*MBB, FuncSize);
+ if (MBB.getNumber() > 0) {
+ unsigned AlignExtra = GetAlignmentAdjustment(MBB, FuncSize);
- auto &BS = BlockSizes[MBB->getNumber()-1];
+ auto &BS = BlockSizes[MBB.getNumber()-1];
BS.first += AlignExtra;
BS.second = AlignExtra;
@@ -138,10 +135,10 @@ unsigned PPCBSel::ComputeBlockSizes(MachineFunction &Fn) {
unsigned BlockSize = 0;
unsigned UnalignedBytesRemaining = 0;
- for (MachineInstr &MI : *MBB) {
+ for (MachineInstr &MI : MBB) {
unsigned MINumBytes = TII->getInstSizeInBytes(MI);
if (MI.isInlineAsm() && (FirstImpreciseBlock < 0))
- FirstImpreciseBlock = MBB->getNumber();
+ FirstImpreciseBlock = MBB.getNumber();
if (TII->isPrefixed(MI.getOpcode())) {
NumPrefixed++;
@@ -171,7 +168,7 @@ unsigned PPCBSel::ComputeBlockSizes(MachineFunction &Fn) {
BlockSize += MINumBytes;
}
- BlockSizes[MBB->getNumber()].first = BlockSize;
+ BlockSizes[MBB.getNumber()].first = BlockSize;
FuncSize += BlockSize;
}
@@ -181,16 +178,13 @@ unsigned PPCBSel::ComputeBlockSizes(MachineFunction &Fn) {
/// Modify the basic block align adjustment.
void PPCBSel::modifyAdjustment(MachineFunction &Fn) {
unsigned Offset = GetInitialOffset(Fn);
- for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
- ++MFI) {
- MachineBasicBlock *MBB = &*MFI;
-
- if (MBB->getNumber() > 0) {
- auto &BS = BlockSizes[MBB->getNumber()-1];
+ for (MachineBasicBlock &MBB : Fn) {
+ if (MBB.getNumber() > 0) {
+ auto &BS = BlockSizes[MBB.getNumber()-1];
BS.first -= BS.second;
Offset -= BS.second;
- unsigned AlignExtra = GetAlignmentAdjustment(*MBB, Offset);
+ unsigned AlignExtra = GetAlignmentAdjustment(MBB, Offset);
BS.first += AlignExtra;
BS.second = AlignExtra;
@@ -198,7 +192,7 @@ void PPCBSel::modifyAdjustment(MachineFunction &Fn) {
Offset += AlignExtra;
}
- Offset += BlockSizes[MBB->getNumber()].first;
+ Offset += BlockSizes[MBB.getNumber()].first;
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
index b9518d6d7064..b1f5bdd885cd 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -81,8 +81,7 @@ FunctionPass *llvm::createPPCCTRLoopsVerify() {
}
static bool clobbersCTR(const MachineInstr &MI) {
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI.getOperand(i);
+ for (const MachineOperand &MO : MI.operands()) {
if (MO.isReg()) {
if (MO.isDef() && (MO.getReg() == PPC::CTR || MO.getReg() == PPC::CTR8))
return true;
@@ -167,18 +166,16 @@ bool PPCCTRLoopsVerify::runOnMachineFunction(MachineFunction &MF) {
// Verify that all bdnz/bdz instructions are dominated by a loop mtctr before
// any other instructions that might clobber the ctr register.
- for (MachineFunction::iterator I = MF.begin(), IE = MF.end();
- I != IE; ++I) {
- MachineBasicBlock *MBB = &*I;
- if (!MDT->isReachableFromEntry(MBB))
+ for (MachineBasicBlock &MBB : MF) {
+ if (!MDT->isReachableFromEntry(&MBB))
continue;
- for (MachineBasicBlock::iterator MII = MBB->getFirstTerminator(),
- MIIE = MBB->end(); MII != MIIE; ++MII) {
+ for (MachineBasicBlock::iterator MII = MBB.getFirstTerminator(),
+ MIIE = MBB.end(); MII != MIIE; ++MII) {
unsigned Opc = MII->getOpcode();
if (Opc == PPC::BDNZ8 || Opc == PPC::BDNZ ||
Opc == PPC::BDZ8 || Opc == PPC::BDZ)
- if (!verifyCTRBranch(MBB, MII))
+ if (!verifyCTRBranch(&MBB, MII))
llvm_unreachable("Invalid PPC CTR loop!");
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp
index be4c9dd60b00..a9794ddd0566 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp
@@ -74,8 +74,7 @@ bool PPCExpandAtomicPseudo::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
TII = static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo());
TRI = &TII->getRegisterInfo();
- for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
- MachineBasicBlock &MBB = *I;
+ for (MachineBasicBlock &MBB : MF) {
for (MachineBasicBlock::iterator MBBI = MBB.begin(), MBBE = MBB.end();
MBBI != MBBE;) {
MachineInstr &MI = *MBBI;
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index fc3c7ec35b8d..3ca563fee970 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -391,9 +391,8 @@ void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
unsigned BPReg = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg;
unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg;
- for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
- BI != BE; ++BI)
- for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) {
+ for (MachineBasicBlock &MBB : MF)
+ for (MachineBasicBlock::iterator MBBI = MBB.end(); MBBI != MBB.begin();) {
--MBBI;
for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) {
MachineOperand &MO = MBBI->getOperand(I);
@@ -1172,8 +1171,8 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
// Describe where callee saved registers were saved, at fixed offsets from
// CFA.
const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
- for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
- unsigned Reg = CSI[I].getReg();
+ for (const CalleeSavedInfo &I : CSI) {
+ unsigned Reg = I.getReg();
if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue;
// This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just
@@ -1204,15 +1203,15 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
continue;
}
- if (CSI[I].isSpilledToReg()) {
- unsigned SpilledReg = CSI[I].getDstReg();
+ if (I.isSpilledToReg()) {
+ unsigned SpilledReg = I.getDstReg();
unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister(
nullptr, MRI->getDwarfRegNum(Reg, true),
MRI->getDwarfRegNum(SpilledReg, true)));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIRegister);
} else {
- int64_t Offset = MFI.getObjectOffset(CSI[I].getFrameIdx());
+ int64_t Offset = MFI.getObjectOffset(I.getFrameIdx());
// We have changed the object offset above but we do not want to change
// the actual offsets in the CFI instruction so we have to undo the
// offset change here.
@@ -2085,15 +2084,15 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
SmallVector<CalleeSavedInfo, 18> FPRegs;
SmallVector<CalleeSavedInfo, 18> VRegs;
- for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- unsigned Reg = CSI[i].getReg();
+ for (const CalleeSavedInfo &I : CSI) {
+ unsigned Reg = I.getReg();
assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() ||
(Reg != PPC::X2 && Reg != PPC::R2)) &&
"Not expecting to try to spill R2 in a function that must save TOC");
if (PPC::GPRCRegClass.contains(Reg)) {
HasGPSaveArea = true;
- GPRegs.push_back(CSI[i]);
+ GPRegs.push_back(I);
if (Reg < MinGPR) {
MinGPR = Reg;
@@ -2101,7 +2100,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
} else if (PPC::G8RCRegClass.contains(Reg)) {
HasG8SaveArea = true;
- G8Regs.push_back(CSI[i]);
+ G8Regs.push_back(I);
if (Reg < MinG8R) {
MinG8R = Reg;
@@ -2109,7 +2108,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
} else if (PPC::F8RCRegClass.contains(Reg)) {
HasFPSaveArea = true;
- FPRegs.push_back(CSI[i]);
+ FPRegs.push_back(I);
if (Reg < MinFPR) {
MinFPR = Reg;
@@ -2123,7 +2122,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
// alignment requirements, so overload the save area for both cases.
HasVRSaveArea = true;
- VRegs.push_back(CSI[i]);
+ VRegs.push_back(I);
if (Reg < MinVR) {
MinVR = Reg;
@@ -2395,8 +2394,8 @@ bool PPCFrameLowering::spillCalleeSavedRegisters(
}
});
- for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- unsigned Reg = CSI[i].getReg();
+ for (const CalleeSavedInfo &I : CSI) {
+ unsigned Reg = I.getReg();
// CR2 through CR4 are the nonvolatile CR fields.
bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4;
@@ -2439,11 +2438,11 @@ bool PPCFrameLowering::spillCalleeSavedRegisters(
MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW))
.addReg(PPC::R12,
getKillRegState(true)),
- CSI[i].getFrameIdx()));
+ I.getFrameIdx()));
}
} else {
- if (CSI[i].isSpilledToReg()) {
- unsigned Dst = CSI[i].getDstReg();
+ if (I.isSpilledToReg()) {
+ unsigned Dst = I.getDstReg();
if (Spilled[Dst])
continue;
@@ -2478,9 +2477,9 @@ bool PPCFrameLowering::spillCalleeSavedRegisters(
if (Subtarget.needsSwapsForVSXMemOps() &&
!MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
TII.storeRegToStackSlotNoUpd(MBB, MI, Reg, !IsLiveIn,
- CSI[i].getFrameIdx(), RC, TRI);
+ I.getFrameIdx(), RC, TRI);
else
- TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, CSI[i].getFrameIdx(),
+ TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, I.getFrameIdx(),
RC, TRI);
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 0abdf81d0908..a2664bcff4ab 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -510,14 +510,12 @@ static bool hasTocDataAttr(SDValue Val, unsigned PointerSize) {
return false;
// TODO: These asserts should be updated as more support for the toc data
- // transformation is added (64 bit, struct support, etc.).
+ // transformation is added (struct support, etc.).
- assert(PointerSize == 4 && "Only 32 Bit Codegen is currently supported by "
- "the toc data transformation.");
-
- assert(PointerSize >= GV->getAlign().valueOrOne().value() &&
- "GlobalVariables with an alignment requirement stricter then 4-bytes "
- "not supported by the toc data transformation.");
+ assert(
+ PointerSize >= GV->getAlign().valueOrOne().value() &&
+ "GlobalVariables with an alignment requirement stricter than TOC entry "
+ "size not supported by the toc data transformation.");
Type *GVType = GV->getValueType();
@@ -537,7 +535,7 @@ static bool hasTocDataAttr(SDValue Val, unsigned PointerSize) {
"supported by the toc data transformation.");
assert(GVType->getPrimitiveSizeInBits() <= PointerSize * 8 &&
- "A GlobalVariable with size larger than 32 bits is not currently "
+ "A GlobalVariable with size larger than a TOC entry is not currently "
"supported by the toc data transformation.");
if (GV->hasLocalLinkage() || GV->hasPrivateLinkage())
@@ -5049,16 +5047,94 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
// value for the comparison. When selecting through a .td file, a type
// error is raised. Must check this first so we never break on the
// !Subtarget->isISA3_1() check.
- if (N->getConstantOperandVal(0) == Intrinsic::ppc_fsels) {
+ auto IntID = N->getConstantOperandVal(0);
+ if (IntID == Intrinsic::ppc_fsels) {
SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3)};
CurDAG->SelectNodeTo(N, PPC::FSELS, MVT::f32, Ops);
return;
}
+ if (IntID == Intrinsic::ppc_bcdadd_p || IntID == Intrinsic::ppc_bcdsub_p) {
+ auto Pred = N->getConstantOperandVal(1);
+ unsigned Opcode =
+ IntID == Intrinsic::ppc_bcdadd_p ? PPC::BCDADD_rec : PPC::BCDSUB_rec;
+ unsigned SubReg = 0;
+ unsigned ShiftVal = 0;
+ bool Reverse = false;
+ switch (Pred) {
+ case 0:
+ SubReg = PPC::sub_eq;
+ ShiftVal = 1;
+ break;
+ case 1:
+ SubReg = PPC::sub_eq;
+ ShiftVal = 1;
+ Reverse = true;
+ break;
+ case 2:
+ SubReg = PPC::sub_lt;
+ ShiftVal = 3;
+ break;
+ case 3:
+ SubReg = PPC::sub_lt;
+ ShiftVal = 3;
+ Reverse = true;
+ break;
+ case 4:
+ SubReg = PPC::sub_gt;
+ ShiftVal = 2;
+ break;
+ case 5:
+ SubReg = PPC::sub_gt;
+ ShiftVal = 2;
+ Reverse = true;
+ break;
+ case 6:
+ SubReg = PPC::sub_un;
+ break;
+ case 7:
+ SubReg = PPC::sub_un;
+ Reverse = true;
+ break;
+ }
+
+ EVT VTs[] = {MVT::v16i8, MVT::Glue};
+ SDValue Ops[] = {N->getOperand(2), N->getOperand(3),
+ CurDAG->getTargetConstant(0, dl, MVT::i32)};
+ SDValue BCDOp = SDValue(CurDAG->getMachineNode(Opcode, dl, VTs, Ops), 0);
+ SDValue CR6Reg = CurDAG->getRegister(PPC::CR6, MVT::i32);
+ // On Power10, we can use SETBC[R]. On prior architectures, we have to use
+ // MFOCRF and shift/negate the value.
+ if (Subtarget->isISA3_1()) {
+ SDValue SubRegIdx = CurDAG->getTargetConstant(SubReg, dl, MVT::i32);
+ SDValue CRBit = SDValue(
+ CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
+ CR6Reg, SubRegIdx, BCDOp.getValue(1)),
+ 0);
+ CurDAG->SelectNodeTo(N, Reverse ? PPC::SETBCR : PPC::SETBC, MVT::i32,
+ CRBit);
+ } else {
+ SDValue Move =
+ SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR6Reg,
+ BCDOp.getValue(1)),
+ 0);
+ SDValue Ops[] = {Move, getI32Imm((32 - (4 + ShiftVal)) & 31, dl),
+ getI32Imm(31, dl), getI32Imm(31, dl)};
+ if (!Reverse)
+ CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
+ else {
+ SDValue Shift = SDValue(
+ CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
+ CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Shift, getI32Imm(1, dl));
+ }
+ }
+ return;
+ }
+
if (!Subtarget->isISA3_1())
break;
unsigned Opcode = 0;
- switch (N->getConstantOperandVal(0)) {
+ switch (IntID) {
default:
break;
case Intrinsic::ppc_altivec_vstribr_p:
@@ -5713,41 +5789,57 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
if (isAIXABI && CModel == CodeModel::Medium)
report_fatal_error("Medium code model is not supported on AIX.");
- // For 64-bit small code model, we allow SelectCodeCommon to handle this,
- // selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA.
- if (isPPC64 && CModel == CodeModel::Small)
+ // For 64-bit ELF small code model, we allow SelectCodeCommon to handle
+ // this, selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA. For AIX
+ // small code model, we need to check for a toc-data attribute.
+ if (isPPC64 && !isAIXABI && CModel == CodeModel::Small)
break;
+ auto replaceWith = [this, &dl](unsigned OpCode, SDNode *TocEntry,
+ EVT OperandTy) {
+ SDValue GA = TocEntry->getOperand(0);
+ SDValue TocBase = TocEntry->getOperand(1);
+ SDNode *MN = CurDAG->getMachineNode(OpCode, dl, OperandTy, GA, TocBase);
+ transferMemOperands(TocEntry, MN);
+ ReplaceNode(TocEntry, MN);
+ };
+
// Handle 32-bit small code model.
- if (!isPPC64) {
+ if (!isPPC64 && CModel == CodeModel::Small) {
// Transforms the ISD::TOC_ENTRY node to passed in Opcode, either
// PPC::ADDItoc, or PPC::LWZtoc
- auto replaceWith = [this, &dl](unsigned OpCode, SDNode *TocEntry) {
- SDValue GA = TocEntry->getOperand(0);
- SDValue TocBase = TocEntry->getOperand(1);
- SDNode *MN = CurDAG->getMachineNode(OpCode, dl, MVT::i32, GA, TocBase);
- transferMemOperands(TocEntry, MN);
- ReplaceNode(TocEntry, MN);
- };
-
if (isELFABI) {
assert(TM.isPositionIndependent() &&
"32-bit ELF can only have TOC entries in position independent"
" code.");
// 32-bit ELF always uses a small code model toc access.
- replaceWith(PPC::LWZtoc, N);
+ replaceWith(PPC::LWZtoc, N, MVT::i32);
return;
}
- if (isAIXABI && CModel == CodeModel::Small) {
- if (hasTocDataAttr(N->getOperand(0),
- CurDAG->getDataLayout().getPointerSize()))
- replaceWith(PPC::ADDItoc, N);
- else
- replaceWith(PPC::LWZtoc, N);
+ assert(isAIXABI && "ELF ABI already handled");
+ if (hasTocDataAttr(N->getOperand(0),
+ CurDAG->getDataLayout().getPointerSize())) {
+ replaceWith(PPC::ADDItoc, N, MVT::i32);
return;
}
+
+ replaceWith(PPC::LWZtoc, N, MVT::i32);
+ return;
+ }
+
+ if (isPPC64 && CModel == CodeModel::Small) {
+ assert(isAIXABI && "ELF ABI handled in common SelectCode");
+
+ if (hasTocDataAttr(N->getOperand(0),
+ CurDAG->getDataLayout().getPointerSize())) {
+ replaceWith(PPC::ADDItoc8, N, MVT::i64);
+ return;
+ }
+ // Break if it doesn't have toc data attribute. Proceed with common
+ // SelectCode.
+ break;
}
assert(CModel != CodeModel::Small && "All small code models handled.");
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index ac952b240a48..ec7e30d7e362 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -12116,6 +12116,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineFunction::iterator It = ++BB->getIterator();
MachineFunction *F = BB->getParent();
+ MachineRegisterInfo &MRI = F->getRegInfo();
if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
MI.getOpcode() == PPC::SELECT_CC_I8 || MI.getOpcode() == PPC::SELECT_I4 ||
@@ -12721,7 +12722,10 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
Register OldFPSCRReg = MI.getOperand(0).getReg();
// Save FPSCR value.
- BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
+ if (MRI.use_empty(OldFPSCRReg))
+ BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
+ else
+ BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
// The floating point rounding mode is in the bits 62:63 of FPCSR, and has
// the following settings:
@@ -12854,7 +12858,10 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
// Result of setflm is previous FPSCR content, so we need to save it first.
Register OldFPSCRReg = MI.getOperand(0).getReg();
- BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
+ if (MRI.use_empty(OldFPSCRReg))
+ BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
+ else
+ BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
// Put bits in 32:63 to FPSCR.
Register NewFPSCRReg = MI.getOperand(1).getReg();
@@ -15966,8 +15973,11 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
}
break;
case 'v':
- if (Subtarget.hasAltivec())
+ if (Subtarget.hasAltivec() && VT.isVector())
return std::make_pair(0U, &PPC::VRRCRegClass);
+ else if (Subtarget.hasVSX())
+ // Scalars in Altivec registers only make sense with VSX.
+ return std::make_pair(0U, &PPC::VFRCRegClass);
break;
case 'y': // crrc
return std::make_pair(0U, &PPC::CRRCRegClass);
@@ -17664,6 +17674,24 @@ PPC::AddrMode PPCTargetLowering::SelectForceXFormMode(SDValue N, SDValue &Disp,
return Mode;
}
+bool PPCTargetLowering::splitValueIntoRegisterParts(
+ SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
+ unsigned NumParts, MVT PartVT, Optional<CallingConv::ID> CC) const {
+ EVT ValVT = Val.getValueType();
+ // If we are splitting a scalar integer into f64 parts (i.e. so they
+ // can be placed into VFRC registers), we need to zero extend and
+ // bitcast the values. This will ensure the value is placed into a
+ // VSR using direct moves or stack operations as needed.
+ if (PartVT == MVT::f64 &&
+ (ValVT == MVT::i32 || ValVT == MVT::i16 || ValVT == MVT::i8)) {
+ Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
+ Val = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Val);
+ Parts[0] = Val;
+ return true;
+ }
+ return false;
+}
+
// If we happen to match to an aligned D-Form, check if the Frame Index is
// adequately aligned. If it is not, reset the mode to match to X-Form.
static void setXFormForUnalignedFI(SDValue N, unsigned Flags,
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 34dce2c3172d..87b7f96112ec 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1139,6 +1139,10 @@ namespace llvm {
PPC::AddrMode SelectForceXFormMode(SDValue N, SDValue &Disp, SDValue &Base,
SelectionDAG &DAG) const;
+ bool
+ splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
+ SDValue *Parts, unsigned NumParts, MVT PartVT,
+ Optional<CallingConv::ID> CC) const override;
/// Structure that collects some common arguments that get passed around
/// between the functions for call lowering.
struct CallFlags {
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index 417a6ce7e522..58af8037f59c 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -773,6 +773,11 @@ def ADDIS8 : DForm_2<15, (outs g8rc:$rD), (ins g8rc_nox0:$rA, s17imm64:$imm),
"addis $rD, $rA, $imm", IIC_IntSimple,
[(set i64:$rD, (add i64:$rA, imm16ShiftedSExt:$imm))]>;
+def LA8 : DForm_2<14, (outs g8rc:$rD), (ins g8rc_nox0:$rA, s16imm64:$sym),
+ "la $rD, $sym($rA)", IIC_IntGeneral,
+ [(set i64:$rD, (add i64:$rA,
+ (PPClo tglobaladdr:$sym, 0)))]>;
+
let Defs = [CARRY] in {
def SUBFIC8: DForm_2< 8, (outs g8rc:$rD), (ins g8rc:$rA, s16imm64:$imm),
"subfic $rD, $rA, $imm", IIC_IntGeneral,
@@ -1435,6 +1440,13 @@ def ADDIStocHA8: PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentr
def ADDItocL: PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp),
"#ADDItocL", []>, isPPC64;
}
+
+// Local Data Transform
+def ADDItoc8 : PPCEmitTimePseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc_nox0:$reg),
+ "#ADDItoc8",
+ [(set i64:$rD,
+ (PPCtoc_entry tglobaladdr:$disp, i64:$reg))]>, isPPC64;
+
let mayLoad = 1 in
def LDtocL: PPCEmitTimePseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc_nox0:$reg),
"#LDtocL", []>, isPPC64;
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
index 1e0e2d88e54b..fe21a164dfab 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -1161,6 +1161,22 @@ def : Pat<(v16i8 (srl (sub v16i8:$vA, (v16i8 (bitconvert(vnot v4i32:$vB)))),
} // end HasAltivec
+// [PO VRT VRA VRB 1 PS XO], "_o" means CR6 is set.
+class VX_VT5_VA5_VB5_PS1_XO9_o<bits<9> xo, string opc, list<dag> pattern>
+ : VX_RD5_RSp5_PS1_XO9<xo,
+ (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, u1imm:$PS),
+ !strconcat(opc, " $vD, $vA, $vB, $PS"), IIC_VecFP, pattern> {
+ let Defs = [CR6];
+}
+
+// [PO VRT VRA VRB 1 / XO]
+class VX_VT5_VA5_VB5_XO9_o<bits<9> xo, string opc, list<dag> pattern>
+ : VX_RD5_RSp5_PS1_XO9<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ !strconcat(opc, " $vD, $vA, $vB"), IIC_VecFP, pattern> {
+ let Defs = [CR6];
+ let PS = 0;
+}
+
def HasP8Altivec : Predicate<"Subtarget->hasP8Altivec()">;
def HasP8Crypto : Predicate<"Subtarget->hasP8Crypto()">;
let Predicates = [HasP8Altivec] in {
@@ -1351,6 +1367,13 @@ def VUPKHSW : VX2_Int_Ty2<1614, "vupkhsw", int_ppc_altivec_vupkhsw,
v2i64, v4i32>;
def VUPKLSW : VX2_Int_Ty2<1742, "vupklsw", int_ppc_altivec_vupklsw,
v2i64, v4i32>;
+def BCDADD_rec : VX_VT5_VA5_VB5_PS1_XO9_o<1, "bcdadd." , []>;
+def BCDSUB_rec : VX_VT5_VA5_VB5_PS1_XO9_o<65, "bcdsub." , []>;
+
+def : Pat<(v16i8 (int_ppc_bcdadd v16i8:$vA, v16i8:$vB, timm:$PS)),
+ (BCDADD_rec $vA, $vB, $PS)>;
+def : Pat<(v16i8 (int_ppc_bcdsub v16i8:$vA, v16i8:$vB, timm:$PS)),
+ (BCDSUB_rec $vA, $vB, $PS)>;
// Shuffle patterns for unary and swapped (LE) vector pack modulo.
def:Pat<(vpkudum_unary_shuffle v16i8:$vA, undef),
@@ -1598,22 +1621,6 @@ def BCDCPSGN_rec : VX1_VT5_VA5_VB5<833, "bcdcpsgn.", []>;
def BCDSETSGN_rec : VX_VT5_EO5_VB5_PS1_XO9_o<31, 385, "bcdsetsgn.", []>;
-// [PO VRT VRA VRB 1 PS XO], "_o" means CR6 is set.
-class VX_VT5_VA5_VB5_PS1_XO9_o<bits<9> xo, string opc, list<dag> pattern>
- : VX_RD5_RSp5_PS1_XO9<xo,
- (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, u1imm:$PS),
- !strconcat(opc, " $vD, $vA, $vB, $PS"), IIC_VecFP, pattern> {
- let Defs = [CR6];
-}
-
-// [PO VRT VRA VRB 1 / XO]
-class VX_VT5_VA5_VB5_XO9_o<bits<9> xo, string opc, list<dag> pattern>
- : VX_RD5_RSp5_PS1_XO9<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- !strconcat(opc, " $vD, $vA, $vB"), IIC_VecFP, pattern> {
- let Defs = [CR6];
- let PS = 0;
-}
-
// Decimal Shift/Unsigned-Shift/Shift-and-Round
def BCDS_rec : VX_VT5_VA5_VB5_PS1_XO9_o<193, "bcds." , []>;
def BCDUS_rec : VX_VT5_VA5_VB5_XO9_o <129, "bcdus.", []>;
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 649a150866b4..a0fd2111de11 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -2138,9 +2138,8 @@ bool PPCInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
}
static bool MBBDefinesCTR(MachineBasicBlock &MBB) {
- for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
- I != IE; ++I)
- if (I->definesRegister(PPC::CTR) || I->definesRegister(PPC::CTR8))
+ for (MachineInstr &MI : MBB)
+ if (MI.definesRegister(PPC::CTR) || MI.definesRegister(PPC::CTR8))
return true;
return false;
}
@@ -2331,8 +2330,7 @@ bool PPCInstrInfo::ClobbersPredicate(MachineInstr &MI,
&PPC::CTRRCRegClass, &PPC::CTRRC8RegClass };
bool Found = false;
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI.getOperand(i);
+ for (const MachineOperand &MO : MI.operands()) {
for (unsigned c = 0; c < array_lengthof(RCs) && !Found; ++c) {
const TargetRegisterClass *RC = RCs[c];
if (MO.isReg()) {
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index d2d5ca92ca1c..d92a10c5b208 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -2471,6 +2471,7 @@ def DblwdCmp {
// [HasVSX, HasP8Vector, IsLittleEndian]
// [HasVSX, HasP8Vector, NoP9Vector, IsBigEndian, IsPPC64]
// [HasVSX, HasP8Vector, NoP9Vector, IsLittleEndian]
+// [HasVSX, HasP8Altivec]
// [HasVSX, HasDirectMove]
// [HasVSX, HasDirectMove, IsBigEndian]
// [HasVSX, HasDirectMove, IsLittleEndian]
@@ -2500,6 +2501,10 @@ let Predicates = [HasVSX, IsBigEndian, HasP8Altivec] in
def : Pat<(v16i8 (int_ppc_altivec_crypto_vpermxor v16i8:$a,
v16i8:$b, v16i8:$c)),
(v16i8 (VPERMXOR $a, $b, $c))>;
+let Predicates = [HasVSX, HasP8Altivec] in
+ def : Pat<(v16i8 (int_ppc_altivec_crypto_vpermxor_be v16i8:$a,
+ v16i8:$b, v16i8:$c)),
+ (v16i8 (VPERMXOR $a, $b, $c))>;
let AddedComplexity = 400 in {
// Valid for any VSX subtarget, regardless of endianness.
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
index 5cc180d770b2..22c5b6c11289 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -152,9 +152,9 @@ void llvm::LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
AsmPrinter &AP) {
OutMI.setOpcode(MI->getOpcode());
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ for (const MachineOperand &MO : MI->operands()) {
MCOperand MCOp;
- if (LowerPPCMachineOperandToMCOperand(MI->getOperand(i), MCOp, AP))
+ if (LowerPPCMachineOperandToMCOperand(MO, MCOp, AP))
OutMI.addOperand(MCOp);
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp
index bdff5109c1e1..9d5206f8fd43 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp
@@ -149,6 +149,79 @@ static bool checkOpConstraints(FusionFeature::FusionKind Kd,
case FusionFeature::FK_SldiAdd:
return (matchingImmOps(FirstMI, 2, 3) && matchingImmOps(FirstMI, 3, 60)) ||
(matchingImmOps(FirstMI, 2, 6) && matchingImmOps(FirstMI, 3, 57));
+
+ // rldicl rx, ra, 1, 0 - xor
+ case FusionFeature::FK_RotateLeftXor:
+ return matchingImmOps(FirstMI, 2, 1) && matchingImmOps(FirstMI, 3, 0);
+
+ // rldicr rx, ra, 1, 63 - xor
+ case FusionFeature::FK_RotateRightXor:
+ return matchingImmOps(FirstMI, 2, 1) && matchingImmOps(FirstMI, 3, 63);
+
+ // We actually use CMPW* and CMPD*, 'l' doesn't exist as an operand in instr.
+
+ // { lbz,lbzx,lhz,lhzx,lwz,lwzx } - cmpi 0,1,rx,{ 0,1,-1 }
+ // { lbz,lbzx,lhz,lhzx,lwz,lwzx } - cmpli 0,L,rx,{ 0,1 }
+ case FusionFeature::FK_LoadCmp1:
+ // { ld,ldx } - cmpi 0,1,rx,{ 0,1,-1 }
+ // { ld,ldx } - cmpli 0,1,rx,{ 0,1 }
+ case FusionFeature::FK_LoadCmp2: {
+ const MachineOperand &BT = SecondMI.getOperand(0);
+ if (!BT.isReg() ||
+ (!Register::isVirtualRegister(BT.getReg()) && BT.getReg() != PPC::CR0))
+ return false;
+ if (SecondMI.getOpcode() == PPC::CMPDI &&
+ matchingImmOps(SecondMI, 2, -1, 16))
+ return true;
+ return matchingImmOps(SecondMI, 2, 0) || matchingImmOps(SecondMI, 2, 1);
+ }
+
+ // { lha,lhax,lwa,lwax } - cmpi 0,L,rx,{ 0,1,-1 }
+ case FusionFeature::FK_LoadCmp3: {
+ const MachineOperand &BT = SecondMI.getOperand(0);
+ if (!BT.isReg() ||
+ (!Register::isVirtualRegister(BT.getReg()) && BT.getReg() != PPC::CR0))
+ return false;
+ return matchingImmOps(SecondMI, 2, 0) || matchingImmOps(SecondMI, 2, 1) ||
+ matchingImmOps(SecondMI, 2, -1, 16);
+ }
+
+ // mtctr - { bcctr,bcctrl }
+ case FusionFeature::FK_ZeroMoveCTR:
+ // ( mtctr rx ) is alias of ( mtspr 9, rx )
+ return (FirstMI.getOpcode() != PPC::MTSPR &&
+ FirstMI.getOpcode() != PPC::MTSPR8) ||
+ matchingImmOps(FirstMI, 0, 9);
+
+ // mtlr - { bclr,bclrl }
+ case FusionFeature::FK_ZeroMoveLR:
+ // ( mtlr rx ) is alias of ( mtspr 8, rx )
+ return (FirstMI.getOpcode() != PPC::MTSPR &&
+ FirstMI.getOpcode() != PPC::MTSPR8) ||
+ matchingImmOps(FirstMI, 0, 8);
+
+ // addis rx,ra,si - addi rt,rx,SI, SI >= 0
+ case FusionFeature::FK_AddisAddi: {
+ const MachineOperand &RA = FirstMI.getOperand(1);
+ const MachineOperand &SI = SecondMI.getOperand(2);
+ if (!SI.isImm() || !RA.isReg())
+ return false;
+ if (RA.getReg() == PPC::ZERO || RA.getReg() == PPC::ZERO8)
+ return false;
+ return SignExtend64(SI.getImm(), 16) >= 0;
+ }
+
+ // addi rx,ra,si - addis rt,rx,SI, ra > 0, SI >= 2
+ case FusionFeature::FK_AddiAddis: {
+ const MachineOperand &RA = FirstMI.getOperand(1);
+ const MachineOperand &SI = FirstMI.getOperand(2);
+ if (!SI.isImm() || !RA.isReg())
+ return false;
+ if (RA.getReg() == PPC::ZERO || RA.getReg() == PPC::ZERO8)
+ return false;
+ int64_t ExtendedSI = SignExtend64(SI.getImm(), 16);
+ return ExtendedSI >= 2;
+ }
}
llvm_unreachable("All the cases should have been handled");
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMacroFusion.def b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMacroFusion.def
index 469a24800423..e4954b722fd0 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMacroFusion.def
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCMacroFusion.def
@@ -78,5 +78,80 @@ FUSION_FEATURE(VecLogical, hasLogicalFusion, -1,
FUSION_FEATURE(SldiAdd, hasArithAddFusion, -1, FUSION_OP_SET(RLDICR, RLDICR_32),
FUSION_OP_SET(ADD4, ADD8, SUBF, SUBF8))
+// rldicl rx, ra, 1, 0 - xor
+FUSION_FEATURE(RotateLeftXor, hasSha3Fusion, 1,
+ FUSION_OP_SET(RLDICL, RLDICL_32, RLDICL_32_64),
+ FUSION_OP_SET(XOR, XOR8))
+
+// rldicr rx, ra, 1, 63 - xor
+FUSION_FEATURE(RotateRightXor, hasSha3Fusion, 1,
+ FUSION_OP_SET(RLDICR, RLDICR_32), FUSION_OP_SET(XOR, XOR8))
+
+// There're two special cases in 'load-compare' series, so we have to split
+// them into several pattern groups to fit into current framework. This can
+// be clearer once we switched to a more expressive approach.
+
+// { lbz,lbzx,lhz,lhzx,lwz,lwzx } - cmpi 0,1,rx,{ 0,1,-1 }
+// { lbz,lbzx,lhz,lhzx,lwz,lwzx } - cmpli 0,L,rx,{ 0,1 }
+FUSION_FEATURE(LoadCmp1, hasCompareFusion, 1,
+ FUSION_OP_SET(LBZ, LBZ8, LBZX, LBZX8, LBZXTLS, LBZXTLS_,
+ LBZXTLS_32, LHZ, LHZ8, LHZX, LHZX8, LHZXTLS,
+ LHZXTLS_, LHZXTLS_32, LWZ, LWZ8, LWZX, LWZX8,
+ LWZXTLS, LWZXTLS_, LWZXTLS_32),
+ FUSION_OP_SET(CMPDI, CMPLDI, CMPLWI))
+
+// { ld,ldx } - cmpi 0,1,rx,{ 0,1,-1 }
+// { ld,ldx } - cmpli 0,1,rx,{ 0,1 }
+FUSION_FEATURE(LoadCmp2, hasCompareFusion, 1,
+ FUSION_OP_SET(LD, LDX, LDXTLS, LDXTLS_),
+ FUSION_OP_SET(CMPDI, CMPLDI))
+
+// { lha,lhax,lwa,lwax } - cmpi 0,L,rx,{ 0,1,-1 }
+FUSION_FEATURE(LoadCmp3, hasCompareFusion, 1,
+ FUSION_OP_SET(LHA, LHA8, LHAX, LHAX8, LWA, LWA_32, LWAX,
+ LWAX_32),
+ FUSION_OP_SET(CMPLDI, CMPLWI))
+
+// ori - oris
+FUSION_FEATURE(OriOris, hasWideImmFusion, 1, FUSION_OP_SET(ORI, ORI8),
+ FUSION_OP_SET(ORIS, ORIS8))
+
+// lis - ori
+FUSION_FEATURE(LisOri, hasWideImmFusion, 1, FUSION_OP_SET(LIS, LIS8),
+ FUSION_OP_SET(ORI, ORI8))
+
+// oris - ori
+FUSION_FEATURE(OrisOri, hasWideImmFusion, 1, FUSION_OP_SET(ORIS, ORIS8),
+ FUSION_OP_SET(ORI, ORI8))
+
+// xori - xoris
+FUSION_FEATURE(XoriXoris, hasWideImmFusion, 1, FUSION_OP_SET(XORI, XORI8),
+ FUSION_OP_SET(XORIS, XORIS8))
+
+// xoris - xori
+FUSION_FEATURE(XorisXori, hasWideImmFusion, 1, FUSION_OP_SET(XORIS, XORIS8),
+ FUSION_OP_SET(XORI, XORI8))
+
+// addis rx,ra,si - addi rt,rx,SI, SI >= 0
+FUSION_FEATURE(AddisAddi, hasWideImmFusion, 1,
+ FUSION_OP_SET(ADDIS, ADDIS8, ADDIStocHA8),
+ FUSION_OP_SET(ADDI, ADDI8, ADDItocL))
+
+// addi rx,ra,si - addis rt,rx,SI, ra > 0, SI >= 2
+FUSION_FEATURE(AddiAddis, hasWideImmFusion, 1,
+ FUSION_OP_SET(ADDI, ADDI8, ADDItocL),
+ FUSION_OP_SET(ADDIS, ADDIS8, ADDIStocHA8))
+
+// mtctr - { bcctr,bcctrl }
+FUSION_FEATURE(ZeroMoveCTR, hasZeroMoveFusion, -1,
+ FUSION_OP_SET(MTCTR, MTCTRloop, MTSPR8, MTSPR),
+ FUSION_OP_SET(BCCTR, BCCTRn, BCCTR8, BCCTR8n, BCCTRL, BCCTRLn,
+ BCCTRL8, BCCTRL8n, gBCCTR, gBCCTRL))
+
+// mtlr - { bclr,bclrl }
+FUSION_FEATURE(ZeroMoveLR, hasZeroMoveFusion, -1,
+ FUSION_OP_SET(MTLR8, MTLR, MTSPR8, MTSPR),
+ FUSION_OP_SET(BCLR, BCLRn, gBCLR, BCLRL, BCLRLn, gBCLRL))
+
#undef FUSION_FEATURE
#undef FUSION_OP_SET
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
index dfc29dbb10f1..1258a1281597 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -131,6 +131,10 @@ void PPCSubtarget::initializeEnvironment() {
HasAddLogicalFusion = false;
HasLogicalAddFusion = false;
HasLogicalFusion = false;
+ HasSha3Fusion = false;
+ HasCompareFusion = false;
+ HasWideImmFusion = false;
+ HasZeroMoveFusion = false;
IsISA2_06 = false;
IsISA2_07 = false;
IsISA3_0 = false;
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCSubtarget.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCSubtarget.h
index 783ea121ccb8..d52833cb1465 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCSubtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCSubtarget.h
@@ -151,6 +151,10 @@ protected:
bool HasAddLogicalFusion;
bool HasLogicalAddFusion;
bool HasLogicalFusion;
+ bool HasSha3Fusion;
+ bool HasCompareFusion;
+ bool HasWideImmFusion;
+ bool HasZeroMoveFusion;
bool IsISA2_06;
bool IsISA2_07;
bool IsISA3_0;
@@ -340,6 +344,10 @@ public:
bool hasAddLogicalFusion() const { return HasAddLogicalFusion; }
bool hasLogicalAddFusion() const { return HasLogicalAddFusion; }
bool hasLogicalFusion() const { return HasLogicalFusion; }
+ bool hasCompareFusion() const { return HasCompareFusion; }
+ bool hasWideImmFusion() const { return HasWideImmFusion; }
+ bool hasSha3Fusion() const { return HasSha3Fusion; }
+ bool hasZeroMoveFusion() const { return HasZeroMoveFusion; }
bool needsSwapsForVSXMemOps() const {
return hasVSX() && isLittleEndian() && !hasP9Vector();
}
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 77d5a2668b60..5d6f58a77a39 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -318,9 +318,20 @@ InstructionCost PPCTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
return PPCTTIImpl::getIntImmCost(Imm, Ty, CostKind);
}
+// Check if the current Type is an MMA vector type. Valid MMA types are
+// v256i1 and v512i1 respectively.
+static bool isMMAType(Type *Ty) {
+ return Ty->isVectorTy() && (Ty->getScalarSizeInBits() == 1) &&
+ (Ty->getPrimitiveSizeInBits() > 128);
+}
+
InstructionCost PPCTTIImpl::getUserCost(const User *U,
ArrayRef<const Value *> Operands,
TTI::TargetCostKind CostKind) {
+ // Set the max cost if an MMA type is present (v256i1, v512i1).
+ if (isMMAType(U->getType()))
+ return InstructionCost::getMax();
+
// We already implement getCastInstrCost and getMemoryOpCost where we perform
// the vector adjustment there.
if (isa<CastInst>(U) || isa<LoadInst>(U) || isa<StoreInst>(U))
@@ -942,32 +953,39 @@ unsigned PPCTTIImpl::getMaxInterleaveFactor(unsigned VF) {
return 2;
}
-// Adjust the cost of vector instructions on targets which there is overlap
-// between the vector and scalar units, thereby reducing the overall throughput
-// of vector code wrt. scalar code.
-InstructionCost PPCTTIImpl::vectorCostAdjustment(InstructionCost Cost,
- unsigned Opcode, Type *Ty1,
- Type *Ty2) {
+// Returns a cost adjustment factor to adjust the cost of vector instructions
+// on targets which there is overlap between the vector and scalar units,
+// thereby reducing the overall throughput of vector code wrt. scalar code.
+// An invalid instruction cost is returned if the type is an MMA vector type.
+InstructionCost PPCTTIImpl::vectorCostAdjustmentFactor(unsigned Opcode,
+ Type *Ty1, Type *Ty2) {
+ // If the vector type is of an MMA type (v256i1, v512i1), an invalid
+ // instruction cost is returned. This is to signify to other cost computing
+ // functions to return the maximum instruction cost in order to prevent any
+ // opportunities for the optimizer to produce MMA types within the IR.
+ if (isMMAType(Ty1))
+ return InstructionCost::getInvalid();
+
if (!ST->vectorsUseTwoUnits() || !Ty1->isVectorTy())
- return Cost;
+ return InstructionCost(1);
std::pair<InstructionCost, MVT> LT1 = TLI->getTypeLegalizationCost(DL, Ty1);
// If type legalization involves splitting the vector, we don't want to
// double the cost at every step - only the last step.
if (LT1.first != 1 || !LT1.second.isVector())
- return Cost;
+ return InstructionCost(1);
int ISD = TLI->InstructionOpcodeToISD(Opcode);
if (TLI->isOperationExpand(ISD, LT1.second))
- return Cost;
+ return InstructionCost(1);
if (Ty2) {
std::pair<InstructionCost, MVT> LT2 = TLI->getTypeLegalizationCost(DL, Ty2);
if (LT2.first != 1 || !LT2.second.isVector())
- return Cost;
+ return InstructionCost(1);
}
- return Cost * 2;
+ return InstructionCost(2);
}
InstructionCost PPCTTIImpl::getArithmeticInstrCost(
@@ -977,6 +995,11 @@ InstructionCost PPCTTIImpl::getArithmeticInstrCost(
TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
const Instruction *CxtI) {
assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
+
+ InstructionCost CostFactor = vectorCostAdjustmentFactor(Opcode, Ty, nullptr);
+ if (!CostFactor.isValid())
+ return InstructionCost::getMax();
+
// TODO: Handle more cost kinds.
if (CostKind != TTI::TCK_RecipThroughput)
return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
@@ -986,12 +1009,18 @@ InstructionCost PPCTTIImpl::getArithmeticInstrCost(
// Fallback to the default implementation.
InstructionCost Cost = BaseT::getArithmeticInstrCost(
Opcode, Ty, CostKind, Op1Info, Op2Info, Opd1PropInfo, Opd2PropInfo);
- return vectorCostAdjustment(Cost, Opcode, Ty, nullptr);
+ return Cost * CostFactor;
}
InstructionCost PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp,
ArrayRef<int> Mask, int Index,
Type *SubTp) {
+
+ InstructionCost CostFactor =
+ vectorCostAdjustmentFactor(Instruction::ShuffleVector, Tp, nullptr);
+ if (!CostFactor.isValid())
+ return InstructionCost::getMax();
+
// Legalize the type.
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
@@ -1000,8 +1029,7 @@ InstructionCost PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp,
// instruction). We need one such shuffle instruction for each actual
// register (this is not true for arbitrary shuffles, but is true for the
// structured types of shuffles covered by TTI::ShuffleKind).
- return vectorCostAdjustment(LT.first, Instruction::ShuffleVector, Tp,
- nullptr);
+ return LT.first * CostFactor;
}
InstructionCost PPCTTIImpl::getCFInstrCost(unsigned Opcode,
@@ -1020,9 +1048,13 @@ InstructionCost PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
const Instruction *I) {
assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
+ InstructionCost CostFactor = vectorCostAdjustmentFactor(Opcode, Dst, Src);
+ if (!CostFactor.isValid())
+ return InstructionCost::getMax();
+
InstructionCost Cost =
BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
- Cost = vectorCostAdjustment(Cost, Opcode, Dst, Src);
+ Cost *= CostFactor;
// TODO: Allow non-throughput costs that aren't binary.
if (CostKind != TTI::TCK_RecipThroughput)
return Cost == 0 ? 0 : 1;
@@ -1034,12 +1066,17 @@ InstructionCost PPCTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
CmpInst::Predicate VecPred,
TTI::TargetCostKind CostKind,
const Instruction *I) {
+ InstructionCost CostFactor =
+ vectorCostAdjustmentFactor(Opcode, ValTy, nullptr);
+ if (!CostFactor.isValid())
+ return InstructionCost::getMax();
+
InstructionCost Cost =
BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
// TODO: Handle other cost kinds.
if (CostKind != TTI::TCK_RecipThroughput)
return Cost;
- return vectorCostAdjustment(Cost, Opcode, ValTy, nullptr);
+ return Cost * CostFactor;
}
InstructionCost PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
@@ -1049,8 +1086,12 @@ InstructionCost PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
+ InstructionCost CostFactor = vectorCostAdjustmentFactor(Opcode, Val, nullptr);
+ if (!CostFactor.isValid())
+ return InstructionCost::getMax();
+
InstructionCost Cost = BaseT::getVectorInstrCost(Opcode, Val, Index);
- Cost = vectorCostAdjustment(Cost, Opcode, Val, nullptr);
+ Cost *= CostFactor;
if (ST->hasVSX() && Val->getScalarType()->isDoubleTy()) {
// Double-precision scalars are already located in index #0 (or #1 if LE).
@@ -1065,7 +1106,7 @@ InstructionCost PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
if (ISD == ISD::INSERT_VECTOR_ELT)
// A move-to VSR and a permute/insert. Assume vector operation cost
// for both (cost will be 2x on P9).
- return vectorCostAdjustment(2, Opcode, Val, nullptr);
+ return 2 * CostFactor;
// It's an extract. Maybe we can do a cheap move-from VSR.
unsigned EltSize = Val->getScalarSizeInBits();
@@ -1082,7 +1123,7 @@ InstructionCost PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
// We need a vector extract (or mfvsrld). Assume vector operation cost.
// The cost of the load constant for a vector extract is disregarded
// (invariant, easily schedulable).
- return vectorCostAdjustment(1, Opcode, Val, nullptr);
+ return CostFactor;
} else if (ST->hasDirectMove())
// Assume permute has standard cost.
@@ -1114,6 +1155,11 @@ InstructionCost PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
unsigned AddressSpace,
TTI::TargetCostKind CostKind,
const Instruction *I) {
+
+ InstructionCost CostFactor = vectorCostAdjustmentFactor(Opcode, Src, nullptr);
+ if (!CostFactor.isValid())
+ return InstructionCost::getMax();
+
if (TLI->getValueType(DL, Src, true) == MVT::Other)
return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
CostKind);
@@ -1128,7 +1174,7 @@ InstructionCost PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
if (CostKind != TTI::TCK_RecipThroughput)
return Cost;
- Cost = vectorCostAdjustment(Cost, Opcode, Src, nullptr);
+ Cost *= CostFactor;
bool IsAltivecType = ST->hasAltivec() &&
(LT.second == MVT::v16i8 || LT.second == MVT::v8i16 ||
@@ -1194,6 +1240,11 @@ InstructionCost PPCTTIImpl::getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
bool UseMaskForCond, bool UseMaskForGaps) {
+ InstructionCost CostFactor =
+ vectorCostAdjustmentFactor(Opcode, VecTy, nullptr);
+ if (!CostFactor.isValid())
+ return InstructionCost::getMax();
+
if (UseMaskForCond || UseMaskForGaps)
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
Alignment, AddressSpace, CostKind,
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
index aa84013803af..7aeb0c59d503 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -100,8 +100,8 @@ public:
unsigned getCacheLineSize() const override;
unsigned getPrefetchDistance() const override;
unsigned getMaxInterleaveFactor(unsigned VF);
- InstructionCost vectorCostAdjustment(InstructionCost Cost, unsigned Opcode,
- Type *Ty1, Type *Ty2);
+ InstructionCost vectorCostAdjustmentFactor(unsigned Opcode, Type *Ty1,
+ Type *Ty2);
InstructionCost getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
index d1979b5456ce..f1c3810f4ee5 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
@@ -170,6 +170,14 @@ void RISCVInstPrinter::printAtomicMemOp(const MCInst *MI, unsigned OpNo,
void RISCVInstPrinter::printVTypeI(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
unsigned Imm = MI->getOperand(OpNo).getImm();
+ // Print the raw immediate for reserved values: vlmul[2:0]=4, vsew[2:0]=0b1xx,
+ // or non-zero bits 8/9/10.
+ if (RISCVVType::getVLMUL(Imm) == RISCVII::VLMUL::LMUL_RESERVED ||
+ RISCVVType::getSEW(Imm) > 64 || (Imm & 0x700) != 0) {
+ O << Imm;
+ return;
+ }
+ // Print the text form.
RISCVVType::printVType(Imm, O);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index 595c3cdfbb1d..f5d491938050 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -237,7 +237,13 @@ bool RISCVFrameLowering::hasBP(const MachineFunction &MF) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
const TargetRegisterInfo *TRI = STI.getRegisterInfo();
- return MFI.hasVarSizedObjects() && TRI->hasStackRealignment(MF);
+ // If we do not reserve stack space for outgoing arguments in prologue,
+ // we will adjust the stack pointer before call instruction. After the
+ // adjustment, we can not use SP to access the stack objects for the
+ // arguments. Instead, use BP to access these stack objects.
+ return (MFI.hasVarSizedObjects() ||
+ (!hasReservedCallFrame(MF) && MFI.getMaxCallFrameSize() != 0)) &&
+ TRI->hasStackRealignment(MF);
}
// Determines the size of the frame and maximum call frame size.
@@ -1065,10 +1071,14 @@ bool RISCVFrameLowering::restoreCalleeSavedRegisters(
if (MI != MBB.end() && !MI->isDebugInstr())
DL = MI->getDebugLoc();
- // Manually restore values not restored by libcall. Insert in reverse order.
+ // Manually restore values not restored by libcall.
+ // Keep the same order as in the prologue. There is no need to reverse the
+ // order in the epilogue. In addition, the return address will be restored
+ // first in the epilogue. It increases the opportunity to avoid the
+ // load-to-use data hazard between loading RA and return by RA.
// loadRegFromStackSlot can insert multiple instructions.
const auto &NonLibcallCSI = getNonLibcallCSI(*MF, CSI);
- for (auto &CS : reverse(NonLibcallCSI)) {
+ for (auto &CS : NonLibcallCSI) {
Register Reg = CS.getReg();
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
TII.loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, TRI);
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 0f1a6e5f9154..f3331571fc55 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -335,17 +335,29 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
setOperationAction(ISD::SELECT, MVT::f16, Custom);
setOperationAction(ISD::BR_CC, MVT::f16, Expand);
- for (auto Op : FPOpToExpand)
- setOperationAction(Op, MVT::f16, Expand);
setOperationAction(ISD::FREM, MVT::f16, Promote);
- setOperationAction(ISD::FCEIL, MVT::f16, Promote);
- setOperationAction(ISD::FFLOOR, MVT::f16, Promote);
- setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote);
- setOperationAction(ISD::FRINT, MVT::f16, Promote);
- setOperationAction(ISD::FROUND, MVT::f16, Promote);
- setOperationAction(ISD::FROUNDEVEN, MVT::f16, Promote);
- setOperationAction(ISD::FTRUNC, MVT::f16, Promote);
+ setOperationAction(ISD::FCEIL, MVT::f16, Promote);
+ setOperationAction(ISD::FFLOOR, MVT::f16, Promote);
+ setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote);
+ setOperationAction(ISD::FRINT, MVT::f16, Promote);
+ setOperationAction(ISD::FROUND, MVT::f16, Promote);
+ setOperationAction(ISD::FROUNDEVEN, MVT::f16, Promote);
+ setOperationAction(ISD::FTRUNC, MVT::f16, Promote);
+ setOperationAction(ISD::FPOW, MVT::f16, Promote);
+ setOperationAction(ISD::FPOWI, MVT::f16, Promote);
+ setOperationAction(ISD::FCOS, MVT::f16, Promote);
+ setOperationAction(ISD::FSIN, MVT::f16, Promote);
+ setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
+ setOperationAction(ISD::FEXP, MVT::f16, Promote);
+ setOperationAction(ISD::FEXP2, MVT::f16, Promote);
+ setOperationAction(ISD::FLOG, MVT::f16, Promote);
+ setOperationAction(ISD::FLOG2, MVT::f16, Promote);
+ setOperationAction(ISD::FLOG10, MVT::f16, Promote);
+
+ // We need to custom promote this.
+ if (Subtarget.is64Bit())
+ setOperationAction(ISD::FPOWI, MVT::i32, Custom);
}
if (Subtarget.hasStdExtF()) {
@@ -676,6 +688,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FMINNUM, VT, Legal);
setOperationAction(ISD::FMAXNUM, VT, Legal);
+ setOperationAction(ISD::FTRUNC, VT, Custom);
+ setOperationAction(ISD::FCEIL, VT, Custom);
+ setOperationAction(ISD::FFLOOR, VT, Custom);
+
setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
@@ -924,6 +940,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FP_ROUND, VT, Custom);
setOperationAction(ISD::FP_EXTEND, VT, Custom);
+ setOperationAction(ISD::FTRUNC, VT, Custom);
+ setOperationAction(ISD::FCEIL, VT, Custom);
+ setOperationAction(ISD::FFLOOR, VT, Custom);
+
for (auto CC : VFPCCToExpand)
setCondCodeAction(CC, VT, Expand);
@@ -1165,6 +1185,10 @@ bool RISCVTargetLowering::shouldSinkOperands(
case Instruction::Shl:
case Instruction::LShr:
case Instruction::AShr:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
return Operand == 1;
case Instruction::Call:
if (auto *II = dyn_cast<IntrinsicInst>(I)) {
@@ -1631,6 +1655,66 @@ static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) {
return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
}
+// Expand vector FTRUNC, FCEIL, and FFLOOR by converting to the integer domain
+// and back. Taking care to avoid converting values that are nan or already
+// correct.
+// TODO: Floor and ceil could be shorter by changing rounding mode, but we don't
+// have FRM dependencies modeled yet.
+static SDValue lowerFTRUNC_FCEIL_FFLOOR(SDValue Op, SelectionDAG &DAG) {
+ MVT VT = Op.getSimpleValueType();
+ assert(VT.isVector() && "Unexpected type");
+
+ SDLoc DL(Op);
+
+ // Freeze the source since we are increasing the number of uses.
+ SDValue Src = DAG.getNode(ISD::FREEZE, DL, VT, Op.getOperand(0));
+
+ // Truncate to integer and convert back to FP.
+ MVT IntVT = VT.changeVectorElementTypeToInteger();
+ SDValue Truncated = DAG.getNode(ISD::FP_TO_SINT, DL, IntVT, Src);
+ Truncated = DAG.getNode(ISD::SINT_TO_FP, DL, VT, Truncated);
+
+ MVT SetccVT = MVT::getVectorVT(MVT::i1, VT.getVectorElementCount());
+
+ if (Op.getOpcode() == ISD::FCEIL) {
+ // If the truncated value is the greater than or equal to the original
+ // value, we've computed the ceil. Otherwise, we went the wrong way and
+ // need to increase by 1.
+ // FIXME: This should use a masked operation. Handle here or in isel?
+ SDValue Adjust = DAG.getNode(ISD::FADD, DL, VT, Truncated,
+ DAG.getConstantFP(1.0, DL, VT));
+ SDValue NeedAdjust = DAG.getSetCC(DL, SetccVT, Truncated, Src, ISD::SETOLT);
+ Truncated = DAG.getSelect(DL, VT, NeedAdjust, Adjust, Truncated);
+ } else if (Op.getOpcode() == ISD::FFLOOR) {
+ // If the truncated value is the less than or equal to the original value,
+ // we've computed the floor. Otherwise, we went the wrong way and need to
+ // decrease by 1.
+ // FIXME: This should use a masked operation. Handle here or in isel?
+ SDValue Adjust = DAG.getNode(ISD::FSUB, DL, VT, Truncated,
+ DAG.getConstantFP(1.0, DL, VT));
+ SDValue NeedAdjust = DAG.getSetCC(DL, SetccVT, Truncated, Src, ISD::SETOGT);
+ Truncated = DAG.getSelect(DL, VT, NeedAdjust, Adjust, Truncated);
+ }
+
+ // Restore the original sign so that -0.0 is preserved.
+ Truncated = DAG.getNode(ISD::FCOPYSIGN, DL, VT, Truncated, Src);
+
+ // Determine the largest integer that can be represented exactly. This and
+ // values larger than it don't have any fractional bits so don't need to
+ // be converted.
+ const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
+ unsigned Precision = APFloat::semanticsPrecision(FltSem);
+ APFloat MaxVal = APFloat(FltSem);
+ MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
+ /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
+ SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
+
+ // If abs(Src) was larger than MaxVal or nan, keep it.
+ SDValue Abs = DAG.getNode(ISD::FABS, DL, VT, Src);
+ SDValue Setcc = DAG.getSetCC(DL, SetccVT, Abs, MaxValNode, ISD::SETOLT);
+ return DAG.getSelect(DL, VT, Setcc, Truncated, Src);
+}
+
static SDValue lowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
MVT VT = Op.getSimpleValueType();
@@ -2670,6 +2754,20 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
DAG.getConstant(3, DL, VT));
return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0));
}
+ case ISD::FPOWI: {
+ // Custom promote f16 powi with illegal i32 integer type on RV64. Once
+ // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
+ if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
+ Op.getOperand(1).getValueType() == MVT::i32) {
+ SDLoc DL(Op);
+ SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
+ SDValue Powi =
+ DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));
+ return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,
+ DAG.getIntPtrConstant(0, DL));
+ }
+ return SDValue();
+ }
case ISD::FP_EXTEND: {
// RVV can only do fp_extend to types double the size as the source. We
// custom-lower f16->f64 extensions to two hops of ISD::FP_EXTEND, going
@@ -2858,6 +2956,10 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::FP_TO_SINT_SAT:
case ISD::FP_TO_UINT_SAT:
return lowerFP_TO_INT_SAT(Op, DAG);
+ case ISD::FTRUNC:
+ case ISD::FCEIL:
+ case ISD::FFLOOR:
+ return lowerFTRUNC_FCEIL_FFLOOR(Op, DAG);
case ISD::VECREDUCE_ADD:
case ISD::VECREDUCE_UMAX:
case ISD::VECREDUCE_SMAX:
@@ -9834,6 +9936,23 @@ bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(EVT VT) const {
return false;
}
+bool RISCVTargetLowering::shouldConvertFpToSat(unsigned Op, EVT FPVT,
+ EVT VT) const {
+ if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
+ return false;
+
+ switch (FPVT.getSimpleVT().SimpleTy) {
+ case MVT::f16:
+ return Subtarget.hasStdExtZfh();
+ case MVT::f32:
+ return Subtarget.hasStdExtF();
+ case MVT::f64:
+ return Subtarget.hasStdExtD();
+ default:
+ return false;
+ }
+}
+
bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
EVT VT) const {
VT = VT.getScalarType();
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 8e3d716ae919..849928eb46ae 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -514,6 +514,8 @@ public:
bool isLegalElementTypeForRVV(Type *ScalarTy) const;
+ bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override;
+
private:
/// RISCVCCAssignFn - This target-specific function extends the default
/// CCValAssign with additional information used to lower RISC-V calling
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index b653928ccea9..6f9cde966132 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -705,6 +705,7 @@ def PseudoLD : PseudoLoad<"ld">;
def PseudoSD : PseudoStore<"sd">;
} // Predicates = [IsRV64]
+def : InstAlias<"li $rd, $imm", (ADDI GPR:$rd, X0, simm12:$imm)>;
def : InstAlias<"mv $rd, $rs", (ADDI GPR:$rd, GPR:$rs, 0)>;
def : InstAlias<"not $rd, $rs", (XORI GPR:$rd, GPR:$rs, -1)>;
def : InstAlias<"neg $rd, $rs", (SUB GPR:$rd, X0, GPR:$rs)>;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index 388cce00bdf3..798532d5bc44 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -20,6 +20,7 @@
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/Support/ErrorHandling.h"
#define GET_REGINFO_TARGET_DESC
@@ -320,3 +321,30 @@ RISCVRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
return &RISCV::VRRegClass;
return RC;
}
+
+void RISCVRegisterInfo::getOffsetOpcodes(const StackOffset &Offset,
+ SmallVectorImpl<uint64_t> &Ops) const {
+ // VLENB is the length of a vector register in bytes. We use <vscale x 8 x i8>
+ // to represent one vector register. The dwarf offset is
+ // VLENB * scalable_offset / 8.
+ assert(Offset.getScalable() % 8 == 0 && "Invalid frame offset");
+
+ // Add fixed-sized offset using existing DIExpression interface.
+ DIExpression::appendOffset(Ops, Offset.getFixed());
+
+ unsigned VLENB = getDwarfRegNum(RISCV::VLENB, true);
+ int64_t VLENBSized = Offset.getScalable() / 8;
+ if (VLENBSized > 0) {
+ Ops.push_back(dwarf::DW_OP_constu);
+ Ops.push_back(VLENBSized);
+ Ops.append({dwarf::DW_OP_bregx, VLENB, 0ULL});
+ Ops.push_back(dwarf::DW_OP_mul);
+ Ops.push_back(dwarf::DW_OP_plus);
+ } else if (VLENBSized < 0) {
+ Ops.push_back(dwarf::DW_OP_constu);
+ Ops.push_back(-VLENBSized);
+ Ops.append({dwarf::DW_OP_bregx, VLENB, 0ULL});
+ Ops.push_back(dwarf::DW_OP_mul);
+ Ops.push_back(dwarf::DW_OP_minus);
+ }
+}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
index 74a5b83ff6f3..2b2bbdfbdf32 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
@@ -63,6 +63,9 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo {
const TargetRegisterClass *
getLargestLegalSuperClass(const TargetRegisterClass *RC,
const MachineFunction &) const override;
+
+ void getOffsetOpcodes(const StackOffset &Offset,
+ SmallVectorImpl<uint64_t> &Ops) const override;
};
}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
index a915a572f3b7..a56f992d320e 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
@@ -480,6 +480,8 @@ let RegAltNameIndices = [ABIRegAltName] in {
def VL : RISCVReg<0, "vl", ["vl"]>;
def VXSAT : RISCVReg<0, "vxsat", ["vxsat"]>;
def VXRM : RISCVReg<0, "vxrm", ["vxrm"]>;
+ def VLENB : RISCVReg<0, "vlenb", ["vlenb"]>,
+ DwarfRegNum<[!add(4096, SysRegVLENB.Encoding)]>;
}
foreach m = [1, 2, 4] in {
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSystemOperands.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSystemOperands.td
index 41599dd8bb3f..5a4c579dd708 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSystemOperands.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVSystemOperands.td
@@ -388,4 +388,4 @@ def : SysReg<"vxrm", 0x00A>;
def : SysReg<"vcsr", 0x00F>;
def : SysReg<"vl", 0xC20>;
def : SysReg<"vtype", 0xC21>;
-def : SysReg<"vlenb", 0xC22>;
+def SysRegVLENB: SysReg<"vlenb", 0xC22>;
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/DelaySlotFiller.cpp b/contrib/llvm-project/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
index 7319924a24ba..259b37954183 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
@@ -53,9 +53,8 @@ namespace {
// instructions to fill delay slot.
F.getRegInfo().invalidateLiveness();
- for (MachineFunction::iterator FI = F.begin(), FE = F.end();
- FI != FE; ++FI)
- Changed |= runOnMachineBasicBlock(*FI);
+ for (MachineBasicBlock &MBB : F)
+ Changed |= runOnMachineBasicBlock(MBB);
return Changed;
}
@@ -319,8 +318,7 @@ void Filler::insertDefsUses(MachineBasicBlock::iterator MI,
SmallSet<unsigned, 32>& RegDefs,
SmallSet<unsigned, 32>& RegUses)
{
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (const MachineOperand &MO : MI->operands()) {
if (!MO.isReg())
continue;
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/LeonPasses.cpp b/contrib/llvm-project/llvm/lib/Target/Sparc/LeonPasses.cpp
index fa05a41f3127..bd26710fcbab 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/LeonPasses.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/LeonPasses.cpp
@@ -42,8 +42,7 @@ bool InsertNOPLoad::runOnMachineFunction(MachineFunction &MF) {
DebugLoc DL = DebugLoc();
bool Modified = false;
- for (auto MFI = MF.begin(), E = MF.end(); MFI != E; ++MFI) {
- MachineBasicBlock &MBB = *MFI;
+ for (MachineBasicBlock &MBB : MF) {
for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E; ++MBBI) {
MachineInstr &MI = *MBBI;
unsigned Opcode = MI.getOpcode();
@@ -77,10 +76,8 @@ bool DetectRoundChange::runOnMachineFunction(MachineFunction &MF) {
Subtarget = &MF.getSubtarget<SparcSubtarget>();
bool Modified = false;
- for (auto MFI = MF.begin(), E = MF.end(); MFI != E; ++MFI) {
- MachineBasicBlock &MBB = *MFI;
- for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E; ++MBBI) {
- MachineInstr &MI = *MBBI;
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
unsigned Opcode = MI.getOpcode();
if (Opcode == SP::CALL && MI.getNumOperands() > 0) {
MachineOperand &MO = MI.getOperand(0);
@@ -129,8 +126,7 @@ bool FixAllFDIVSQRT::runOnMachineFunction(MachineFunction &MF) {
DebugLoc DL = DebugLoc();
bool Modified = false;
- for (auto MFI = MF.begin(), E = MF.end(); MFI != E; ++MFI) {
- MachineBasicBlock &MBB = *MFI;
+ for (MachineBasicBlock &MBB : MF) {
for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E; ++MBBI) {
MachineInstr &MI = *MBBI;
unsigned Opcode = MI.getOpcode();
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
index d165052ca512..a740de9123c9 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
@@ -343,19 +343,18 @@ void SparcFrameLowering::remapRegsForLeafProc(MachineFunction &MF) const {
}
// Rewrite MBB's Live-ins.
- for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
- MBB != E; ++MBB) {
+ for (MachineBasicBlock &MBB : MF) {
for (unsigned reg = SP::I0_I1; reg <= SP::I6_I7; ++reg) {
- if (!MBB->isLiveIn(reg))
+ if (!MBB.isLiveIn(reg))
continue;
- MBB->removeLiveIn(reg);
- MBB->addLiveIn(reg - SP::I0_I1 + SP::O0_O1);
+ MBB.removeLiveIn(reg);
+ MBB.addLiveIn(reg - SP::I0_I1 + SP::O0_O1);
}
for (unsigned reg = SP::I0; reg <= SP::I7; ++reg) {
- if (!MBB->isLiveIn(reg))
+ if (!MBB.isLiveIn(reg))
continue;
- MBB->removeLiveIn(reg);
- MBB->addLiveIn(reg - SP::I0 + SP::O0);
+ MBB.removeLiveIn(reg);
+ MBB.addLiveIn(reg - SP::I0 + SP::O0);
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcMCInstLower.cpp b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcMCInstLower.cpp
index 8ea317fdd453..4e7e7bb5c81b 100644
--- a/contrib/llvm-project/llvm/lib/Target/Sparc/SparcMCInstLower.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Sparc/SparcMCInstLower.cpp
@@ -97,8 +97,7 @@ void llvm::LowerSparcMachineInstrToMCInst(const MachineInstr *MI,
OutMI.setOpcode(MI->getOpcode());
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (const MachineOperand &MO : MI->operands()) {
MCOperand MCOp = LowerOperand(MI, MO, AP);
if (MCOp.isValid())
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
index ac94570e568f..631cbff303e8 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
@@ -144,8 +144,7 @@ Reference SystemZElimCompare::getRegReferences(MachineInstr &MI, unsigned Reg) {
if (MI.isDebugInstr())
return Ref;
- for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
- const MachineOperand &MO = MI.getOperand(I);
+ for (const MachineOperand &MO : MI.operands()) {
if (MO.isReg()) {
if (Register MOReg = MO.getReg()) {
if (TRI->regsOverlap(MOReg, Reg)) {
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
index d11d118fb8ee..2f7cdfcf7bde 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -270,8 +270,8 @@ bool SystemZELFFrameLowering::spillCalleeSavedRegisters(
// Make sure all call-saved GPRs are included as operands and are
// marked as live on entry.
- for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
- unsigned Reg = CSI[I].getReg();
+ for (const CalleeSavedInfo &I : CSI) {
+ unsigned Reg = I.getReg();
if (SystemZ::GR64BitRegClass.contains(Reg))
addSavedGPR(MBB, MIB, Reg, true);
}
@@ -283,16 +283,16 @@ bool SystemZELFFrameLowering::spillCalleeSavedRegisters(
}
// Save FPRs/VRs in the normal TargetInstrInfo way.
- for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
- unsigned Reg = CSI[I].getReg();
+ for (const CalleeSavedInfo &I : CSI) {
+ unsigned Reg = I.getReg();
if (SystemZ::FP64BitRegClass.contains(Reg)) {
MBB.addLiveIn(Reg);
- TII->storeRegToStackSlot(MBB, MBBI, Reg, true, CSI[I].getFrameIdx(),
+ TII->storeRegToStackSlot(MBB, MBBI, Reg, true, I.getFrameIdx(),
&SystemZ::FP64BitRegClass, TRI);
}
if (SystemZ::VR128BitRegClass.contains(Reg)) {
MBB.addLiveIn(Reg);
- TII->storeRegToStackSlot(MBB, MBBI, Reg, true, CSI[I].getFrameIdx(),
+ TII->storeRegToStackSlot(MBB, MBBI, Reg, true, I.getFrameIdx(),
&SystemZ::VR128BitRegClass, TRI);
}
}
@@ -313,13 +313,13 @@ bool SystemZELFFrameLowering::restoreCalleeSavedRegisters(
DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
// Restore FPRs/VRs in the normal TargetInstrInfo way.
- for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
- unsigned Reg = CSI[I].getReg();
+ for (const CalleeSavedInfo &I : CSI) {
+ unsigned Reg = I.getReg();
if (SystemZ::FP64BitRegClass.contains(Reg))
- TII->loadRegFromStackSlot(MBB, MBBI, Reg, CSI[I].getFrameIdx(),
+ TII->loadRegFromStackSlot(MBB, MBBI, Reg, I.getFrameIdx(),
&SystemZ::FP64BitRegClass, TRI);
if (SystemZ::VR128BitRegClass.contains(Reg))
- TII->loadRegFromStackSlot(MBB, MBBI, Reg, CSI[I].getFrameIdx(),
+ TII->loadRegFromStackSlot(MBB, MBBI, Reg, I.getFrameIdx(),
&SystemZ::VR128BitRegClass, TRI);
}
@@ -345,8 +345,8 @@ bool SystemZELFFrameLowering::restoreCalleeSavedRegisters(
MIB.addImm(RestoreGPRs.GPROffset);
// Do a second scan adding regs as being defined by instruction
- for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
- unsigned Reg = CSI[I].getReg();
+ for (const CalleeSavedInfo &I : CSI) {
+ unsigned Reg = I.getReg();
if (Reg != RestoreGPRs.LowGPR && Reg != RestoreGPRs.HighGPR &&
SystemZ::GR64BitRegClass.contains(Reg))
MIB.addReg(Reg, RegState::ImplicitDefine);
@@ -965,24 +965,24 @@ bool SystemZXPLINKFrameLowering::spillCalleeSavedRegisters(
// Make sure all call-saved GPRs are included as operands and are
// marked as live on entry.
auto &GRRegClass = SystemZ::GR64BitRegClass;
- for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
- unsigned Reg = CSI[I].getReg();
+ for (const CalleeSavedInfo &I : CSI) {
+ unsigned Reg = I.getReg();
if (GRRegClass.contains(Reg))
addSavedGPR(MBB, MIB, Reg, true);
}
}
// Spill FPRs to the stack in the normal TargetInstrInfo way
- for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
- unsigned Reg = CSI[I].getReg();
+ for (const CalleeSavedInfo &I : CSI) {
+ unsigned Reg = I.getReg();
if (SystemZ::FP64BitRegClass.contains(Reg)) {
MBB.addLiveIn(Reg);
- TII->storeRegToStackSlot(MBB, MBBI, Reg, true, CSI[I].getFrameIdx(),
+ TII->storeRegToStackSlot(MBB, MBBI, Reg, true, I.getFrameIdx(),
&SystemZ::FP64BitRegClass, TRI);
}
if (SystemZ::VR128BitRegClass.contains(Reg)) {
MBB.addLiveIn(Reg);
- TII->storeRegToStackSlot(MBB, MBBI, Reg, true, CSI[I].getFrameIdx(),
+ TII->storeRegToStackSlot(MBB, MBBI, Reg, true, I.getFrameIdx(),
&SystemZ::VR128BitRegClass, TRI);
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.h b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
index 6fddb4f81c41..af219da79c32 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
@@ -29,7 +29,18 @@ public:
create(const SystemZSubtarget &STI);
// Override TargetFrameLowering.
- bool isFPCloseToIncomingSP() const override { return false; }
+ bool allocateScavengingFrameIndexesNearIncomingSP(
+ const MachineFunction &MF) const override {
+ // SystemZ wants normal register scavenging slots, as close to the stack or
+ // frame pointer as possible.
+ // The default implementation assumes an x86-like layout, where the frame
+ // pointer is at the opposite end of the frame from the stack pointer.
+ // This meant that when frame pointer elimination was disabled,
+ // the slots ended up being as close as possible to the incoming
+ // stack pointer, which is the opposite of what we want on SystemZ.
+ return false;
+ }
+
bool hasReservedCallFrame(const MachineFunction &MF) const override;
MachineBasicBlock::iterator
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
@@ -43,7 +54,6 @@ public:
SystemZELFFrameLowering();
// Override TargetFrameLowering.
- bool isFPCloseToIncomingSP() const override { return false; }
bool
assignCalleeSavedSpillSlots(MachineFunction &MF,
const TargetRegisterInfo *TRI,
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 2bf80882fa61..e80496e37781 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -203,8 +203,8 @@ void SystemZInstrInfo::expandZExtPseudo(MachineInstr &MI, unsigned LowOpcode,
Size, MI.getOperand(1).isKill(), MI.getOperand(1).isUndef());
// Keep the remaining operands as-is.
- for (unsigned I = 2; I < MI.getNumOperands(); ++I)
- MIB.add(MI.getOperand(I));
+ for (const MachineOperand &MO : llvm::drop_begin(MI.operands(), 2))
+ MIB.add(MO);
MI.eraseFromParent();
}
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZMCInstLower.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZMCInstLower.cpp
index ef39f80a94ef..d2932de5a6ea 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZMCInstLower.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZMCInstLower.cpp
@@ -93,10 +93,8 @@ MCOperand SystemZMCInstLower::lowerOperand(const MachineOperand &MO) const {
void SystemZMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
OutMI.setOpcode(MI->getOpcode());
- for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
- const MachineOperand &MO = MI->getOperand(I);
+ for (const MachineOperand &MO : MI->operands())
// Ignore all implicit register operands.
if (!MO.isReg() || !MO.isImplicit())
OutMI.addOperand(lowerOperand(MO));
- }
}
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.cpp
index 1fe9423e01b8..1d8c3d514bfb 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.cpp
@@ -23,14 +23,6 @@ using namespace llvm;
#define DEBUG_TYPE "ve-asmprinter"
-// The generated AsmMatcher VEGenAsmWriter uses "VE" as the target
-// namespace.
-namespace llvm {
-namespace VE {
-using namespace VE;
-}
-} // namespace llvm
-
#define GET_INSTRUCTION_NAME
#define PRINT_ALIAS_INSTR
#include "VEGenAsmWriter.inc"
@@ -62,13 +54,10 @@ void VEInstPrinter::printOperand(const MCInst *MI, int OpNum,
}
if (MO.isImm()) {
- switch (MI->getOpcode()) {
- default:
- // Expects signed 32bit literals
- int32_t TruncatedImm = static_cast<int32_t>(MO.getImm());
- O << TruncatedImm;
- return;
- }
+ // Expects signed 32bit literals.
+ int32_t TruncatedImm = static_cast<int32_t>(MO.getImm());
+ O << TruncatedImm;
+ return;
}
assert(MO.isExpr() && "Unknown operand kind in printOperand");
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.cpp
index ddcfb9da8249..46846edfeafb 100644
--- a/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/VE/VEInstrInfo.cpp
@@ -942,11 +942,11 @@ bool VEInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
MachineInstrBuilder MIB =
BuildMI(*MBB, MI, DL, get(VE::SVMmi), Dest).addReg(VMZ).addImm(Imm);
MachineInstr *Inst = MIB.getInstr();
- MI.eraseFromParent();
if (KillSrc) {
const TargetRegisterInfo *TRI = &getRegisterInfo();
Inst->addRegisterKilled(MI.getOperand(1).getReg(), TRI, true);
}
+ MI.eraseFromParent();
return true;
}
case VE::VFMKyal:
@@ -956,6 +956,7 @@ bool VEInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
case VE::VFMKSyvl:
case VE::VFMKSyvyl:
expandPseudoVFMK(*this, MI);
+ return true;
}
return false;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
index 80abccd74782..7b70d99b5f52 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
@@ -368,8 +368,8 @@ FunctionPass *WebAssemblyPassConfig::createTargetRegisterAllocator(bool) {
return nullptr; // No reg alloc
}
-static void checkSanityForEHAndSjLj(const TargetMachine *TM) {
- // Sanity checking related to -exception-model
+static void basicCheckForEHAndSjLj(const TargetMachine *TM) {
+ // Basic Correctness checking related to -exception-model
if (TM->Options.ExceptionModel != ExceptionHandling::None &&
TM->Options.ExceptionModel != ExceptionHandling::Wasm)
report_fatal_error("-exception-model should be either 'none' or 'wasm'");
@@ -431,7 +431,7 @@ void WebAssemblyPassConfig::addIRPasses() {
if (getOptLevel() != CodeGenOpt::None)
addPass(createWebAssemblyOptimizeReturned());
- checkSanityForEHAndSjLj(TM);
+ basicCheckForEHAndSjLj(TM);
// If exception handling is not enabled and setjmp/longjmp handling is
// enabled, we lower invokes into calls and delete unreachable landingpad
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index d4f39b571394..3df48b466d07 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -148,7 +148,7 @@ public:
AlignBranchType.addKind(X86::AlignBranchJcc);
AlignBranchType.addKind(X86::AlignBranchJmp);
}
- // Allow overriding defaults set by master flag
+ // Allow overriding defaults set by main flag
if (X86AlignBranchBoundary.getNumOccurrences())
AlignBoundary = assumeAligned(X86AlignBranchBoundary);
if (X86AlignBranch.getNumOccurrences())
@@ -1452,9 +1452,7 @@ public:
unsigned NumDefCFAOffsets = 0;
int MinAbsOffset = std::numeric_limits<int>::max();
- for (unsigned i = 0, e = Instrs.size(); i != e; ++i) {
- const MCCFIInstruction &Inst = Instrs[i];
-
+ for (const MCCFIInstruction &Inst : Instrs) {
switch (Inst.getOperation()) {
default:
// Any other CFI directives indicate a frame that we aren't prepared
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86DomainReassignment.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86DomainReassignment.cpp
index a2ae6345c006..9826bf4bf861 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86DomainReassignment.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86DomainReassignment.cpp
@@ -186,8 +186,8 @@ public:
TII->getRegClass(TII->get(DstOpcode), 0, MRI->getTargetRegisterInfo(),
*MBB->getParent()));
MachineInstrBuilder Bld = BuildMI(*MBB, MI, DL, TII->get(DstOpcode), Reg);
- for (unsigned Idx = 1, End = MI->getNumOperands(); Idx < End; ++Idx)
- Bld.add(MI->getOperand(Idx));
+ for (const MachineOperand &MO : llvm::drop_begin(MI->operands()))
+ Bld.add(MO);
BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::COPY))
.add(MI->getOperand(0))
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86DynAllocaExpander.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86DynAllocaExpander.cpp
index df8df1e3a65d..c8ceebb8b8e6 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86DynAllocaExpander.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86DynAllocaExpander.cpp
@@ -212,6 +212,12 @@ void X86DynAllocaExpander::lower(MachineInstr *MI, Lowering L) {
bool Is64BitAlloca = MI->getOpcode() == X86::DYN_ALLOCA_64;
assert(SlotSize == 4 || SlotSize == 8);
+ Optional<MachineFunction::DebugInstrOperandPair> InstrNum = None;
+ if (unsigned Num = MI->peekDebugInstrNum()) {
+ // Operand 2 of DYN_ALLOCAs contains the stack def.
+ InstrNum = {Num, 2};
+ }
+
switch (L) {
case TouchAndSub: {
assert(Amount >= SlotSize);
@@ -251,7 +257,7 @@ void X86DynAllocaExpander::lower(MachineInstr *MI, Lowering L) {
// Do the probe.
STI->getFrameLowering()->emitStackProbe(*MBB->getParent(), *MBB, MI, DL,
- /*InProlog=*/false);
+ /*InProlog=*/false, InstrNum);
} else {
// Sub
BuildMI(*MBB, I, DL,
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86ExpandPseudo.cpp
index 01dc509df795..93bc23006dc4 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86ExpandPseudo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ExpandPseudo.cpp
@@ -209,10 +209,8 @@ void X86ExpandPseudo::expandCALL_RVMARKER(MachineBasicBlock &MBB,
llvm_unreachable("unexpected opcode");
OriginalCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)).getInstr();
- unsigned OpStart = 1;
bool RAXImplicitDead = false;
- for (; OpStart < MI.getNumOperands(); ++OpStart) {
- MachineOperand &Op = MI.getOperand(OpStart);
+ for (MachineOperand &Op : llvm::drop_begin(MI.operands())) {
// RAX may be 'implicit dead', if there are no other users of the return
// value. We introduce a new use, so change it to 'implicit def'.
if (Op.isReg() && Op.isImplicit() && Op.isDead() &&
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86FixupLEAs.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86FixupLEAs.cpp
index 9a63cffe0a09..4730b936ec1f 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86FixupLEAs.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86FixupLEAs.cpp
@@ -278,10 +278,9 @@ FixupLEAPass::usesRegister(MachineOperand &p, MachineBasicBlock::iterator I) {
RegUsageState RegUsage = RU_NotUsed;
MachineInstr &MI = *I;
- for (unsigned i = 0; i < MI.getNumOperands(); ++i) {
- MachineOperand &opnd = MI.getOperand(i);
- if (opnd.isReg() && opnd.getReg() == p.getReg()) {
- if (opnd.isDef())
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isReg() && MO.getReg() == p.getReg()) {
+ if (MO.isDef())
return RU_Write;
RegUsage = RU_Read;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86FloatingPoint.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86FloatingPoint.cpp
index 60e1b37ed61c..4d9160f35226 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86FloatingPoint.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86FloatingPoint.cpp
@@ -446,11 +446,9 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
// Get dead variables list now because the MI pointer may be deleted as part
// of processing!
SmallVector<unsigned, 8> DeadRegs;
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI.getOperand(i);
+ for (const MachineOperand &MO : MI.operands())
if (MO.isReg() && MO.isDead())
DeadRegs.push_back(MO.getReg());
- }
switch (FPInstClass) {
case X86II::ZeroArgFP: handleZeroArgFP(I); break;
@@ -1672,8 +1670,7 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &Inst) {
// Collect all FP registers (register operands with constraints "t", "u",
// and "f") to kill afer the instruction.
unsigned FPKills = ((1u << NumFPRegs) - 1) & ~0xff;
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- MachineOperand &Op = MI.getOperand(i);
+ for (const MachineOperand &Op : MI.operands()) {
if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
continue;
unsigned FPReg = getFPReg(Op);
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.cpp
index bd780273509f..c29ae9f6af4c 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -465,13 +465,11 @@ void X86FrameLowering::emitCalleeSavedFrameMoves(
// Add callee saved registers to move list.
const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
- if (CSI.empty()) return;
// Calculate offsets.
- for (std::vector<CalleeSavedInfo>::const_iterator
- I = CSI.begin(), E = CSI.end(); I != E; ++I) {
- int64_t Offset = MFI.getObjectOffset(I->getFrameIdx());
- unsigned Reg = I->getReg();
+ for (const CalleeSavedInfo &I : CSI) {
+ int64_t Offset = MFI.getObjectOffset(I.getFrameIdx());
+ unsigned Reg = I.getReg();
unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
if (IsPrologue) {
@@ -484,10 +482,10 @@ void X86FrameLowering::emitCalleeSavedFrameMoves(
}
}
-void X86FrameLowering::emitStackProbe(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- const DebugLoc &DL, bool InProlog) const {
+void X86FrameLowering::emitStackProbe(
+ MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog,
+ Optional<MachineFunction::DebugInstrOperandPair> InstrNum) const {
const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
if (STI.isTargetWindowsCoreCLR()) {
if (InProlog) {
@@ -497,10 +495,14 @@ void X86FrameLowering::emitStackProbe(MachineFunction &MF,
emitStackProbeInline(MF, MBB, MBBI, DL, false);
}
} else {
- emitStackProbeCall(MF, MBB, MBBI, DL, InProlog);
+ emitStackProbeCall(MF, MBB, MBBI, DL, InProlog, InstrNum);
}
}
+bool X86FrameLowering::stackProbeFunctionModifiesSP() const {
+ return STI.isOSWindows() && !STI.isTargetWin64();
+}
+
void X86FrameLowering::inlineStackProbe(MachineFunction &MF,
MachineBasicBlock &PrologMBB) const {
auto Where = llvm::find_if(PrologMBB, [](MachineInstr &MI) {
@@ -971,11 +973,10 @@ void X86FrameLowering::emitStackProbeInlineWindowsCoreCLR64(
}
}
-void X86FrameLowering::emitStackProbeCall(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- const DebugLoc &DL,
- bool InProlog) const {
+void X86FrameLowering::emitStackProbeCall(
+ MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog,
+ Optional<MachineFunction::DebugInstrOperandPair> InstrNum) const {
bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large;
// FIXME: Add indirect thunk support and remove this.
@@ -1015,6 +1016,7 @@ void X86FrameLowering::emitStackProbeCall(MachineFunction &MF,
.addReg(SP, RegState::Define | RegState::Implicit)
.addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
+ MachineInstr *ModInst = CI;
if (STI.isTargetWin64() || !STI.isOSWindows()) {
// MSVC x32's _chkstk and cygwin/mingw's _alloca adjust %esp themselves.
// MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp
@@ -1022,9 +1024,27 @@ void X86FrameLowering::emitStackProbeCall(MachineFunction &MF,
// adjusting %rsp.
// All other platforms do not specify a particular ABI for the stack probe
// function, so we arbitrarily define it to not adjust %esp/%rsp itself.
- BuildMI(MBB, MBBI, DL, TII.get(getSUBrrOpcode(Uses64BitFramePtr)), SP)
- .addReg(SP)
- .addReg(AX);
+ ModInst =
+ BuildMI(MBB, MBBI, DL, TII.get(getSUBrrOpcode(Uses64BitFramePtr)), SP)
+ .addReg(SP)
+ .addReg(AX);
+ }
+
+ // DebugInfo variable locations -- if there's an instruction number for the
+ // allocation (i.e., DYN_ALLOC_*), substitute it for the instruction that
+ // modifies SP.
+ if (InstrNum) {
+ if (STI.isTargetWin64() || !STI.isOSWindows()) {
+ // Label destination operand of the subtract.
+ MF.makeDebugValueSubstitution(*InstrNum,
+ {ModInst->getDebugInstrNum(), 0});
+ } else {
+ // Label the call. The operand number is the penultimate operand, zero
+ // based.
+ unsigned SPDefOperand = ModInst->getNumOperands() - 2;
+ MF.makeDebugValueSubstitution(
+ *InstrNum, {ModInst->getDebugInstrNum(), SPDefOperand});
+ }
}
if (InProlog) {
@@ -2652,8 +2672,8 @@ bool X86FrameLowering::restoreCalleeSavedRegisters(
DebugLoc DL = MBB.findDebugLoc(MI);
// Reload XMMs from stack frame.
- for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- unsigned Reg = CSI[i].getReg();
+ for (const CalleeSavedInfo &I : CSI) {
+ unsigned Reg = I.getReg();
if (X86::GR64RegClass.contains(Reg) ||
X86::GR32RegClass.contains(Reg))
continue;
@@ -2664,13 +2684,13 @@ bool X86FrameLowering::restoreCalleeSavedRegisters(
VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
- TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RC, TRI);
+ TII.loadRegFromStackSlot(MBB, MI, Reg, I.getFrameIdx(), RC, TRI);
}
// POP GPRs.
unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r;
- for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- unsigned Reg = CSI[i].getReg();
+ for (const CalleeSavedInfo &I : CSI) {
+ unsigned Reg = I.getReg();
if (!X86::GR64RegClass.contains(Reg) &&
!X86::GR32RegClass.contains(Reg))
continue;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.h b/contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.h
index 6309b8a066c4..e18be0d26321 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.h
@@ -13,6 +13,7 @@
#ifndef LLVM_LIB_TARGET_X86_X86FRAMELOWERING_H
#define LLVM_LIB_TARGET_X86_X86FRAMELOWERING_H
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/Support/TypeSize.h"
@@ -51,9 +52,14 @@ public:
/// Emit target stack probe code. This is required for all
/// large stack allocations on Windows. The caller is required to materialize
/// the number of bytes to probe in RAX/EAX.
- void emitStackProbe(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
- bool InProlog) const;
+ /// \p InstrNum optionally contains a debug-info instruction number for the
+ /// new stack pointer.
+ void emitStackProbe(
+ MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog,
+ Optional<MachineFunction::DebugInstrOperandPair> InstrNum = None) const;
+
+ bool stackProbeFunctionModifiesSP() const override;
/// Replace a StackProbe inline-stub with the actual probe code inline.
void inlineStackProbe(MachineFunction &MF,
@@ -198,9 +204,10 @@ private:
uint64_t calculateMaxStackAlign(const MachineFunction &MF) const;
/// Emit target stack probe as a call to a helper function
- void emitStackProbeCall(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
- bool InProlog) const;
+ void emitStackProbeCall(
+ MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog,
+ Optional<MachineFunction::DebugInstrOperandPair> InstrNum) const;
/// Emit target stack probe as an inline sequence.
void emitStackProbeInline(MachineFunction &MF, MachineBasicBlock &MBB,
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp
index 17d14053d804..62b2387396be 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -23190,6 +23190,10 @@ static SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
bool X86TargetLowering::isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
+ // We don't need to replace SQRT with RSQRT for half type.
+ if (VT.getScalarType() == MVT::f16)
+ return true;
+
// We never want to use both SQRT and RSQRT instructions for the same input.
if (DAG.getNodeIfExists(X86ISD::FRSQRT, DAG.getVTList(VT), Op))
return false;
@@ -23228,11 +23232,15 @@ SDValue X86TargetLowering::getSqrtEstimate(SDValue Op,
UseOneConstNR = false;
// There is no FSQRT for 512-bits, but there is RSQRT14.
unsigned Opcode = VT == MVT::v16f32 ? X86ISD::RSQRT14 : X86ISD::FRSQRT;
- return DAG.getNode(Opcode, DL, VT, Op);
+ SDValue Estimate = DAG.getNode(Opcode, DL, VT, Op);
+ if (RefinementSteps == 0 && !Reciprocal)
+ Estimate = DAG.getNode(ISD::FMUL, DL, VT, Op, Estimate);
+ return Estimate;
}
if (VT.getScalarType() == MVT::f16 && isTypeLegal(VT) &&
Subtarget.hasFP16()) {
+ assert(Reciprocal && "Don't replace SQRT with RSQRT for half type");
if (RefinementSteps == ReciprocalEstimate::Unspecified)
RefinementSteps = 0;
@@ -45680,7 +45688,7 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG,
if (is64BitFP && !Subtarget.is64Bit()) {
// On a 32-bit target, we cannot bitcast the 64-bit float to a
// 64-bit integer, since that's not a legal type. Since
- // OnesOrZeroesF is all ones of all zeroes, we don't need all the
+ // OnesOrZeroesF is all ones or all zeroes, we don't need all the
// bits, but can do this little dance to extract the lowest 32 bits
// and work with those going forward.
SDValue Vector64 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f64,
@@ -46577,6 +46585,59 @@ static SDValue combineOrCmpEqZeroToCtlzSrl(SDNode *N, SelectionDAG &DAG,
return Ret;
}
+static SDValue foldMaskedMergeImpl(SDValue And0_L, SDValue And0_R,
+ SDValue And1_L, SDValue And1_R, SDLoc DL,
+ SelectionDAG &DAG) {
+ if (!isBitwiseNot(And0_L, true) || !And0_L->hasOneUse())
+ return SDValue();
+ SDValue NotOp = And0_L->getOperand(0);
+ if (NotOp == And1_R)
+ std::swap(And1_R, And1_L);
+ if (NotOp != And1_L)
+ return SDValue();
+
+ // (~(NotOp) & And0_R) | (NotOp & And1_R)
+ // --> ((And0_R ^ And1_R) & NotOp) ^ And1_R
+ EVT VT = And1_L->getValueType(0);
+ SDValue Freeze_And0_R = DAG.getNode(ISD::FREEZE, SDLoc(), VT, And0_R);
+ SDValue Xor0 = DAG.getNode(ISD::XOR, DL, VT, And1_R, Freeze_And0_R);
+ SDValue And = DAG.getNode(ISD::AND, DL, VT, Xor0, NotOp);
+ SDValue Xor1 = DAG.getNode(ISD::XOR, DL, VT, And, Freeze_And0_R);
+ return Xor1;
+}
+
+/// Fold "masked merge" expressions like `(m & x) | (~m & y)` into the
+/// equivalent `((x ^ y) & m) ^ y)` pattern.
+/// This is typically a better representation for targets without a fused
+/// "and-not" operation. This function is intended to be called from a
+/// `TargetLowering::PerformDAGCombine` callback on `ISD::OR` nodes.
+static SDValue foldMaskedMerge(SDNode *Node, SelectionDAG &DAG) {
+ // Note that masked-merge variants using XOR or ADD expressions are
+ // normalized to OR by InstCombine so we only check for OR.
+ assert(Node->getOpcode() == ISD::OR && "Must be called with ISD::OR node");
+ SDValue N0 = Node->getOperand(0);
+ if (N0->getOpcode() != ISD::AND || !N0->hasOneUse())
+ return SDValue();
+ SDValue N1 = Node->getOperand(1);
+ if (N1->getOpcode() != ISD::AND || !N1->hasOneUse())
+ return SDValue();
+
+ SDLoc DL(Node);
+ SDValue N00 = N0->getOperand(0);
+ SDValue N01 = N0->getOperand(1);
+ SDValue N10 = N1->getOperand(0);
+ SDValue N11 = N1->getOperand(1);
+ if (SDValue Result = foldMaskedMergeImpl(N00, N01, N10, N11, DL, DAG))
+ return Result;
+ if (SDValue Result = foldMaskedMergeImpl(N01, N00, N10, N11, DL, DAG))
+ return Result;
+ if (SDValue Result = foldMaskedMergeImpl(N10, N11, N00, N01, DL, DAG))
+ return Result;
+ if (SDValue Result = foldMaskedMergeImpl(N11, N10, N00, N01, DL, DAG))
+ return Result;
+ return SDValue();
+}
+
static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
@@ -46670,6 +46731,11 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
return Res;
}
+ // We should fold "masked merge" patterns when `andn` is not available.
+ if (!Subtarget.hasBMI() && VT.isScalarInteger() && VT != MVT::i1)
+ if (SDValue R = foldMaskedMerge(N, DAG))
+ return R;
+
return SDValue();
}
@@ -48504,20 +48570,50 @@ static SDValue combinePMULH(SDValue Src, EVT VT, const SDLoc &DL,
SDValue LHS = Src.getOperand(0).getOperand(0);
SDValue RHS = Src.getOperand(0).getOperand(1);
- unsigned ExtOpc = LHS.getOpcode();
- if ((ExtOpc != ISD::SIGN_EXTEND && ExtOpc != ISD::ZERO_EXTEND) ||
- RHS.getOpcode() != ExtOpc)
- return SDValue();
-
- // Peek through the extends.
- LHS = LHS.getOperand(0);
- RHS = RHS.getOperand(0);
-
- // Ensure the input types match.
- if (LHS.getValueType() != VT || RHS.getValueType() != VT)
- return SDValue();
+ // Count leading sign/zero bits on both inputs - if there are enough then
+ // truncation back to vXi16 will be cheap - either as a pack/shuffle
+ // sequence or using AVX512 truncations. If the inputs are sext/zext then the
+ // truncations may actually be free by peeking through to the ext source.
+ auto IsSext = [&DAG](SDValue V) {
+ return DAG.ComputeMinSignedBits(V) <= 16;
+ };
+ auto IsZext = [&DAG](SDValue V) {
+ return DAG.computeKnownBits(V).countMaxActiveBits() <= 16;
+ };
- unsigned Opc = ExtOpc == ISD::SIGN_EXTEND ? ISD::MULHS : ISD::MULHU;
+ bool IsSigned = IsSext(LHS) && IsSext(RHS);
+ bool IsUnsigned = IsZext(LHS) && IsZext(RHS);
+ if (!IsSigned && !IsUnsigned)
+ return SDValue();
+
+ // Check if both inputs are extensions, which will be removed by truncation.
+ bool IsTruncateFree = (LHS.getOpcode() == ISD::SIGN_EXTEND ||
+ LHS.getOpcode() == ISD::ZERO_EXTEND) &&
+ (RHS.getOpcode() == ISD::SIGN_EXTEND ||
+ RHS.getOpcode() == ISD::ZERO_EXTEND) &&
+ LHS.getOperand(0).getScalarValueSizeInBits() <= 16 &&
+ RHS.getOperand(0).getScalarValueSizeInBits() <= 16;
+
+ // For AVX2+ targets, with the upper bits known zero, we can perform MULHU on
+ // the (bitcasted) inputs directly, and then cheaply pack/truncate the result
+ // (upper elts will be zero). Don't attempt this with just AVX512F as MULHU
+ // will have to split anyway.
+ unsigned InSizeInBits = InVT.getSizeInBits();
+ if (IsUnsigned && !IsTruncateFree && Subtarget.hasInt256() &&
+ !(Subtarget.hasAVX512() && !Subtarget.hasBWI() && VT.is256BitVector()) &&
+ (InSizeInBits % 16) == 0) {
+ EVT BCVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16,
+ InVT.getSizeInBits() / 16);
+ SDValue Res = DAG.getNode(ISD::MULHU, DL, BCVT, DAG.getBitcast(BCVT, LHS),
+ DAG.getBitcast(BCVT, RHS));
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getBitcast(InVT, Res));
+ }
+
+ // Truncate back to source type.
+ LHS = DAG.getNode(ISD::TRUNCATE, DL, VT, LHS);
+ RHS = DAG.getNode(ISD::TRUNCATE, DL, VT, RHS);
+
+ unsigned Opc = IsSigned ? ISD::MULHS : ISD::MULHU;
return DAG.getNode(Opc, DL, VT, LHS, RHS);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td
index 8aee96e1c504..1db83033ba35 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -12937,8 +12937,8 @@ def : Pat<(v16i32 (X86vzmovl
(iPTR 0)))),
(SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>;
-def : Pat<(v8i16 (X86vzmovl (v8i16 (scalar_to_vector (i16 (trunc GR32:$src)))))),
- (VMOVW2SHrr GR32:$src)>;
+def : Pat<(v8i16 (X86vzmovl (scalar_to_vector (i16 GR16:$src)))),
+ (VMOVW2SHrr (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit))>;
// AVX 128-bit movw instruction write zeros in the high 128-bit part.
def : Pat<(v8i16 (X86vzload16 addr:$src)),
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.cpp
index 639aa5199ea5..bb5637a31947 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -1163,8 +1163,7 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
/// True if MI has a condition code def, e.g. EFLAGS, that is not marked dead.
bool X86InstrInfo::hasLiveCondCodeDef(MachineInstr &MI) const {
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI.getOperand(i);
+ for (const MachineOperand &MO : MI.operands()) {
if (MO.isReg() && MO.isDef() &&
MO.getReg() == X86::EFLAGS && !MO.isDead()) {
return true;
@@ -5676,10 +5675,8 @@ static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode,
MachineOperand &MO = MI.getOperand(i + 2);
MIB.add(MO);
}
- for (unsigned i = NumOps + 2, e = MI.getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI.getOperand(i);
+ for (const MachineOperand &MO : llvm::drop_begin(MI.operands(), NumOps + 2))
MIB.add(MO);
- }
updateOperandRegConstraints(MF, *NewMI, TII);
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterBanks.td b/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterBanks.td
index 74c515850ab1..91a497252595 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterBanks.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86RegisterBanks.td
@@ -1,4 +1,4 @@
-//=- X86RegisterBank.td - Describe the AArch64 Banks -----*- tablegen -*-=//
+//=- X86RegisterBank.td - Describe the X86 Banks -------------*- tablegen -*-=//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 06dacb638d16..869762b35196 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -1584,54 +1584,98 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
{ ISD::ZERO_EXTEND, MVT::v32i16, MVT::v32i8, 1 },
// Mask sign extend has an instruction.
- { ISD::SIGN_EXTEND, MVT::v2i8, MVT::v2i1, 1 },
- { ISD::SIGN_EXTEND, MVT::v2i16, MVT::v2i1, 1 },
- { ISD::SIGN_EXTEND, MVT::v4i8, MVT::v4i1, 1 },
- { ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i1, 1 },
- { ISD::SIGN_EXTEND, MVT::v8i8, MVT::v8i1, 1 },
- { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i1, 1 },
- { ISD::SIGN_EXTEND, MVT::v16i8, MVT::v16i1, 1 },
- { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i1, 1 },
- { ISD::SIGN_EXTEND, MVT::v32i8, MVT::v32i1, 1 },
- { ISD::SIGN_EXTEND, MVT::v32i16, MVT::v32i1, 1 },
- { ISD::SIGN_EXTEND, MVT::v32i16, MVT::v64i1, 1 },
- { ISD::SIGN_EXTEND, MVT::v64i8, MVT::v64i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v2i8, MVT::v2i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v16i8, MVT::v2i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v2i16, MVT::v2i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v2i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v4i8, MVT::v4i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v16i8, MVT::v4i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v4i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v8i8, MVT::v8i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v16i8, MVT::v8i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v16i8, MVT::v16i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v32i8, MVT::v32i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v32i16, MVT::v32i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v64i8, MVT::v64i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v32i16, MVT::v64i1, 1 },
// Mask zero extend is a sext + shift.
- { ISD::ZERO_EXTEND, MVT::v2i8, MVT::v2i1, 2 },
- { ISD::ZERO_EXTEND, MVT::v2i16, MVT::v2i1, 2 },
- { ISD::ZERO_EXTEND, MVT::v4i8, MVT::v4i1, 2 },
- { ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i1, 2 },
- { ISD::ZERO_EXTEND, MVT::v8i8, MVT::v8i1, 2 },
- { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i1, 2 },
- { ISD::ZERO_EXTEND, MVT::v16i8, MVT::v16i1, 2 },
- { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i1, 2 },
- { ISD::ZERO_EXTEND, MVT::v32i8, MVT::v32i1, 2 },
- { ISD::ZERO_EXTEND, MVT::v32i16, MVT::v32i1, 2 },
- { ISD::ZERO_EXTEND, MVT::v32i16, MVT::v64i1, 2 },
- { ISD::ZERO_EXTEND, MVT::v64i8, MVT::v64i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v2i8, MVT::v2i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v16i8, MVT::v2i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v2i16, MVT::v2i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v2i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v4i8, MVT::v4i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v16i8, MVT::v4i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v4i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v8i8, MVT::v8i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v16i8, MVT::v8i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v16i8, MVT::v16i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v32i8, MVT::v32i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v32i16, MVT::v32i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v64i8, MVT::v64i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v32i16, MVT::v64i1, 2 },
+
+ { ISD::TRUNCATE, MVT::v2i1, MVT::v2i8, 2 },
+ { ISD::TRUNCATE, MVT::v2i1, MVT::v16i8, 2 },
+ { ISD::TRUNCATE, MVT::v2i1, MVT::v2i16, 2 },
+ { ISD::TRUNCATE, MVT::v2i1, MVT::v8i16, 2 },
+ { ISD::TRUNCATE, MVT::v4i1, MVT::v4i8, 2 },
+ { ISD::TRUNCATE, MVT::v4i1, MVT::v16i8, 2 },
+ { ISD::TRUNCATE, MVT::v4i1, MVT::v4i16, 2 },
+ { ISD::TRUNCATE, MVT::v4i1, MVT::v8i16, 2 },
+ { ISD::TRUNCATE, MVT::v8i1, MVT::v8i8, 2 },
+ { ISD::TRUNCATE, MVT::v8i1, MVT::v16i8, 2 },
+ { ISD::TRUNCATE, MVT::v8i1, MVT::v8i16, 2 },
+ { ISD::TRUNCATE, MVT::v16i1, MVT::v16i8, 2 },
+ { ISD::TRUNCATE, MVT::v16i1, MVT::v16i16, 2 },
+ { ISD::TRUNCATE, MVT::v32i1, MVT::v32i8, 2 },
+ { ISD::TRUNCATE, MVT::v32i1, MVT::v32i16, 2 },
+ { ISD::TRUNCATE, MVT::v64i1, MVT::v64i8, 2 },
+ { ISD::TRUNCATE, MVT::v64i1, MVT::v32i16, 2 },
{ ISD::TRUNCATE, MVT::v32i8, MVT::v32i16, 2 },
{ ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 2 }, // widen to zmm
- { ISD::TRUNCATE, MVT::v2i1, MVT::v2i8, 2 }, // widen to zmm
- { ISD::TRUNCATE, MVT::v2i1, MVT::v2i16, 2 }, // widen to zmm
{ ISD::TRUNCATE, MVT::v2i8, MVT::v2i16, 2 }, // vpmovwb
- { ISD::TRUNCATE, MVT::v4i1, MVT::v4i8, 2 }, // widen to zmm
- { ISD::TRUNCATE, MVT::v4i1, MVT::v4i16, 2 }, // widen to zmm
{ ISD::TRUNCATE, MVT::v4i8, MVT::v4i16, 2 }, // vpmovwb
- { ISD::TRUNCATE, MVT::v8i1, MVT::v8i8, 2 }, // widen to zmm
- { ISD::TRUNCATE, MVT::v8i1, MVT::v16i8, 2 }, // widen to zmm
- { ISD::TRUNCATE, MVT::v8i1, MVT::v8i16, 2 }, // widen to zmm
{ ISD::TRUNCATE, MVT::v8i8, MVT::v8i16, 2 }, // vpmovwb
- { ISD::TRUNCATE, MVT::v16i1, MVT::v16i8, 2 }, // widen to zmm
- { ISD::TRUNCATE, MVT::v16i1, MVT::v16i16, 2 }, // widen to zmm
- { ISD::TRUNCATE, MVT::v32i1, MVT::v32i8, 2 }, // widen to zmm
- { ISD::TRUNCATE, MVT::v32i1, MVT::v32i16, 2 },
- { ISD::TRUNCATE, MVT::v64i1, MVT::v32i16, 2 },
- { ISD::TRUNCATE, MVT::v64i1, MVT::v64i8, 2 },
};
static const TypeConversionCostTblEntry AVX512DQConversionTbl[] = {
+ // Mask sign extend has an instruction.
+ { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v2i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v16i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i1, 1 },
+
+ // Mask zero extend is a sext + shift.
+ { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v2i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v16i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i1, 2 },
+
+ { ISD::TRUNCATE, MVT::v2i1, MVT::v2i64, 2 },
+ { ISD::TRUNCATE, MVT::v2i1, MVT::v4i32, 2 },
+ { ISD::TRUNCATE, MVT::v4i1, MVT::v4i32, 2 },
+ { ISD::TRUNCATE, MVT::v4i1, MVT::v4i64, 2 },
+ { ISD::TRUNCATE, MVT::v8i1, MVT::v8i32, 2 },
+ { ISD::TRUNCATE, MVT::v8i1, MVT::v8i64, 2 },
+ { ISD::TRUNCATE, MVT::v16i1, MVT::v16i32, 2 },
+ { ISD::TRUNCATE, MVT::v16i1, MVT::v8i64, 2 },
+
{ ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i64, 1 },
{ ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i64, 1 },
@@ -1786,40 +1830,94 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
static const TypeConversionCostTblEntry AVX512BWVLConversionTbl[] {
// Mask sign extend has an instruction.
- { ISD::SIGN_EXTEND, MVT::v2i8, MVT::v2i1, 1 },
- { ISD::SIGN_EXTEND, MVT::v2i16, MVT::v2i1, 1 },
- { ISD::SIGN_EXTEND, MVT::v4i8, MVT::v4i1, 1 },
- { ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i1, 1 },
- { ISD::SIGN_EXTEND, MVT::v8i8, MVT::v8i1, 1 },
- { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i1, 1 },
- { ISD::SIGN_EXTEND, MVT::v16i8, MVT::v16i1, 1 },
- { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i1, 1 },
- { ISD::SIGN_EXTEND, MVT::v32i8, MVT::v32i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v2i8, MVT::v2i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v16i8, MVT::v2i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v2i16, MVT::v2i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v2i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v16i8, MVT::v4i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v4i8, MVT::v4i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v4i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v8i8, MVT::v8i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v16i8, MVT::v8i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v16i8, MVT::v16i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v32i8, MVT::v32i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v32i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v32i8, MVT::v64i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v64i1, 1 },
// Mask zero extend is a sext + shift.
- { ISD::ZERO_EXTEND, MVT::v2i8, MVT::v2i1, 2 },
- { ISD::ZERO_EXTEND, MVT::v2i16, MVT::v2i1, 2 },
- { ISD::ZERO_EXTEND, MVT::v4i8, MVT::v4i1, 2 },
- { ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i1, 2 },
- { ISD::ZERO_EXTEND, MVT::v8i8, MVT::v8i1, 2 },
- { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i1, 2 },
- { ISD::ZERO_EXTEND, MVT::v16i8, MVT::v16i1, 2 },
- { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i1, 2 },
- { ISD::ZERO_EXTEND, MVT::v32i8, MVT::v32i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v2i8, MVT::v2i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v16i8, MVT::v2i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v2i16, MVT::v2i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v2i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v4i8, MVT::v4i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v16i8, MVT::v4i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v4i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v8i8, MVT::v8i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v16i8, MVT::v8i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v16i8, MVT::v16i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v32i8, MVT::v32i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v32i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v32i8, MVT::v64i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v64i1, 2 },
+
+ { ISD::TRUNCATE, MVT::v2i1, MVT::v2i8, 2 },
+ { ISD::TRUNCATE, MVT::v2i1, MVT::v16i8, 2 },
+ { ISD::TRUNCATE, MVT::v2i1, MVT::v2i16, 2 },
+ { ISD::TRUNCATE, MVT::v2i1, MVT::v8i16, 2 },
+ { ISD::TRUNCATE, MVT::v4i1, MVT::v4i8, 2 },
+ { ISD::TRUNCATE, MVT::v4i1, MVT::v16i8, 2 },
+ { ISD::TRUNCATE, MVT::v4i1, MVT::v4i16, 2 },
+ { ISD::TRUNCATE, MVT::v4i1, MVT::v8i16, 2 },
+ { ISD::TRUNCATE, MVT::v8i1, MVT::v8i8, 2 },
+ { ISD::TRUNCATE, MVT::v8i1, MVT::v16i8, 2 },
+ { ISD::TRUNCATE, MVT::v8i1, MVT::v8i16, 2 },
+ { ISD::TRUNCATE, MVT::v16i1, MVT::v16i8, 2 },
+ { ISD::TRUNCATE, MVT::v16i1, MVT::v16i16, 2 },
+ { ISD::TRUNCATE, MVT::v32i1, MVT::v32i8, 2 },
+ { ISD::TRUNCATE, MVT::v32i1, MVT::v16i16, 2 },
+ { ISD::TRUNCATE, MVT::v64i1, MVT::v32i8, 2 },
+ { ISD::TRUNCATE, MVT::v64i1, MVT::v16i16, 2 },
{ ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 2 },
- { ISD::TRUNCATE, MVT::v2i1, MVT::v2i8, 2 }, // vpsllw+vptestmb
- { ISD::TRUNCATE, MVT::v2i1, MVT::v2i16, 2 }, // vpsllw+vptestmw
- { ISD::TRUNCATE, MVT::v4i1, MVT::v4i8, 2 }, // vpsllw+vptestmb
- { ISD::TRUNCATE, MVT::v4i1, MVT::v4i16, 2 }, // vpsllw+vptestmw
- { ISD::TRUNCATE, MVT::v8i1, MVT::v8i8, 2 }, // vpsllw+vptestmb
- { ISD::TRUNCATE, MVT::v8i1, MVT::v8i16, 2 }, // vpsllw+vptestmw
- { ISD::TRUNCATE, MVT::v16i1, MVT::v16i8, 2 }, // vpsllw+vptestmb
- { ISD::TRUNCATE, MVT::v16i1, MVT::v16i16, 2 }, // vpsllw+vptestmw
- { ISD::TRUNCATE, MVT::v32i1, MVT::v32i8, 2 }, // vpsllw+vptestmb
};
static const TypeConversionCostTblEntry AVX512DQVLConversionTbl[] = {
+ // Mask sign extend has an instruction.
+ { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v2i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v16i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v8i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v16i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 1 },
+
+ // Mask zero extend is a sext + shift.
+ { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v2i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v16i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v8i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v16i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 2 },
+
+ { ISD::TRUNCATE, MVT::v16i1, MVT::v4i64, 2 },
+ { ISD::TRUNCATE, MVT::v16i1, MVT::v8i32, 2 },
+ { ISD::TRUNCATE, MVT::v2i1, MVT::v2i64, 2 },
+ { ISD::TRUNCATE, MVT::v2i1, MVT::v4i32, 2 },
+ { ISD::TRUNCATE, MVT::v4i1, MVT::v4i32, 2 },
+ { ISD::TRUNCATE, MVT::v4i1, MVT::v4i64, 2 },
+ { ISD::TRUNCATE, MVT::v8i1, MVT::v4i64, 2 },
+ { ISD::TRUNCATE, MVT::v8i1, MVT::v8i32, 2 },
+
{ ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i64, 1 },
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i64, 1 },
@@ -3674,6 +3772,10 @@ X86TTIImpl::getReplicationShuffleCost(Type *EltTy, int ReplicationFactor,
PromEltTyBits = 16; // promote to i16, AVX512BW.
break;
}
+ if (ST->hasDQI()) {
+ PromEltTyBits = 32; // promote to i32, AVX512F.
+ break;
+ }
return bailout();
default:
return bailout();
@@ -3969,7 +4071,9 @@ InstructionCost X86TTIImpl::getAddressComputationCost(Type *Ty,
// Even in the case of (loop invariant) stride whose value is not known at
// compile time, the address computation will not incur more than one extra
// ADD instruction.
- if (Ty->isVectorTy() && SE) {
+ if (Ty->isVectorTy() && SE && !ST->hasAVX2()) {
+ // TODO: AVX2 is the current cut-off because we don't have correct
+ // interleaving costs for prior ISA's.
if (!BaseT::isStridedAccess(Ptr))
return NumVectorInstToHideOverhead;
if (!BaseT::getConstantStrideStep(SE, Ptr))
@@ -5173,7 +5277,8 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX512(
auto *SingleMemOpTy = FixedVectorType::get(VecTy->getElementType(),
LegalVT.getVectorNumElements());
InstructionCost MemOpCost;
- if (UseMaskForCond || UseMaskForGaps)
+ bool UseMaskedMemOp = UseMaskForCond || UseMaskForGaps;
+ if (UseMaskedMemOp)
MemOpCost = getMaskedMemoryOpCost(Opcode, SingleMemOpTy, Alignment,
AddressSpace, CostKind);
else
@@ -5183,9 +5288,8 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX512(
unsigned VF = VecTy->getNumElements() / Factor;
MVT VT = MVT::getVectorVT(MVT::getVT(VecTy->getScalarType()), VF);
- // FIXME: this is the most conservative estimate for the mask cost.
InstructionCost MaskCost;
- if (UseMaskForCond || UseMaskForGaps) {
+ if (UseMaskedMemOp) {
APInt DemandedLoadStoreElts = APInt::getZero(VecTy->getNumElements());
for (unsigned Index : Indices) {
assert(Index < Factor && "Invalid index for interleaved memory op");
@@ -5193,10 +5297,10 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX512(
DemandedLoadStoreElts.setBit(Index + Elm * Factor);
}
- Type *I8Type = Type::getInt8Ty(VecTy->getContext());
+ Type *I1Type = Type::getInt1Ty(VecTy->getContext());
MaskCost = getReplicationShuffleCost(
- I8Type, Factor, VF,
+ I1Type, Factor, VF,
UseMaskForGaps ? DemandedLoadStoreElts
: APInt::getAllOnes(VecTy->getNumElements()),
CostKind);
@@ -5207,7 +5311,7 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX512(
// memory access, we need to account for the cost of And-ing the two masks
// inside the loop.
if (UseMaskForGaps) {
- auto *MaskVT = FixedVectorType::get(I8Type, VecTy->getNumElements());
+ auto *MaskVT = FixedVectorType::get(I1Type, VecTy->getNumElements());
MaskCost += getArithmeticInstrCost(BinaryOperator::And, MaskVT, CostKind);
}
}
@@ -5248,9 +5352,10 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX512(
NumOfLoadsInInterleaveGrp;
// About a half of the loads may be folded in shuffles when we have only
- // one result. If we have more than one result, we do not fold loads at all.
+ // one result. If we have more than one result, or the loads are masked,
+ // we do not fold loads at all.
unsigned NumOfUnfoldedLoads =
- NumOfResults > 1 ? NumOfMemOps : NumOfMemOps / 2;
+ UseMaskedMemOp || NumOfResults > 1 ? NumOfMemOps : NumOfMemOps / 2;
// Get a number of shuffle operations per result.
unsigned NumOfShufflesPerResult =
diff --git a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreFrameLowering.cpp
index 27ac6a4d1439..f2f89f4269ed 100644
--- a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreFrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreFrameLowering.cpp
@@ -427,19 +427,19 @@ bool XCoreFrameLowering::spillCalleeSavedRegisters(
if (MI != MBB.end() && !MI->isDebugInstr())
DL = MI->getDebugLoc();
- for (auto it = CSI.begin(); it != CSI.end(); ++it) {
- unsigned Reg = it->getReg();
+ for (const CalleeSavedInfo &I : CSI) {
+ unsigned Reg = I.getReg();
assert(Reg != XCore::LR && !(Reg == XCore::R10 && hasFP(*MF)) &&
"LR & FP are always handled in emitPrologue");
// Add the callee-saved register as live-in. It's killed at the spill.
MBB.addLiveIn(Reg);
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- TII.storeRegToStackSlot(MBB, MI, Reg, true, it->getFrameIdx(), RC, TRI);
+ TII.storeRegToStackSlot(MBB, MI, Reg, true, I.getFrameIdx(), RC, TRI);
if (emitFrameMoves) {
auto Store = MI;
--Store;
- XFI->getSpillLabels().push_back(std::make_pair(Store, *it));
+ XFI->getSpillLabels().push_back(std::make_pair(Store, I));
}
}
return true;
diff --git a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp
index b5dbdea98eea..71836133fae6 100644
--- a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp
@@ -48,9 +48,7 @@ bool XCoreFTAOElim::runOnMachineFunction(MachineFunction &MF) {
const XCoreInstrInfo &TII =
*static_cast<const XCoreInstrInfo *>(MF.getSubtarget().getInstrInfo());
unsigned StackSize = MF.getFrameInfo().getStackSize();
- for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E;
- ++MFI) {
- MachineBasicBlock &MBB = *MFI;
+ for (MachineBasicBlock &MBB : MF) {
for (MachineBasicBlock::iterator MBBI = MBB.begin(), EE = MBB.end();
MBBI != EE; ++MBBI) {
if (MBBI->getOpcode() == XCore::FRAME_TO_ARGS_OFFSET) {
diff --git a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreMCInstLower.cpp b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreMCInstLower.cpp
index cd28fa5cd144..6f5dcb291e6e 100644
--- a/contrib/llvm-project/llvm/lib/Target/XCore/XCoreMCInstLower.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/XCore/XCoreMCInstLower.cpp
@@ -103,8 +103,7 @@ MCOperand XCoreMCInstLower::LowerOperand(const MachineOperand &MO,
void XCoreMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
OutMI.setOpcode(MI->getOpcode());
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (const MachineOperand &MO : MI->operands()) {
MCOperand MCOp = LowerOperand(MO);
if (MCOp.isValid())
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index b2c2efed7db8..ba7589c2bf60 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -25,6 +25,7 @@
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
@@ -275,94 +276,64 @@ CleanupPointerRootUsers(GlobalVariable *GV,
/// We just marked GV constant. Loop over all users of the global, cleaning up
/// the obvious ones. This is largely just a quick scan over the use list to
/// clean up the easy and obvious cruft. This returns true if it made a change.
-static bool CleanupConstantGlobalUsers(
- Value *V, Constant *Init, const DataLayout &DL,
- function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
+static bool CleanupConstantGlobalUsers(GlobalVariable *GV,
+ const DataLayout &DL) {
+ Constant *Init = GV->getInitializer();
+ SmallVector<User *, 8> WorkList(GV->users());
+ SmallPtrSet<User *, 8> Visited;
bool Changed = false;
- // Note that we need to use a weak value handle for the worklist items. When
- // we delete a constant array, we may also be holding pointer to one of its
- // elements (or an element of one of its elements if we're dealing with an
- // array of arrays) in the worklist.
- SmallVector<WeakTrackingVH, 8> WorkList(V->users());
+
+ SmallVector<WeakTrackingVH> MaybeDeadInsts;
+ auto EraseFromParent = [&](Instruction *I) {
+ for (Value *Op : I->operands())
+ if (auto *OpI = dyn_cast<Instruction>(Op))
+ MaybeDeadInsts.push_back(OpI);
+ I->eraseFromParent();
+ Changed = true;
+ };
while (!WorkList.empty()) {
- Value *UV = WorkList.pop_back_val();
- if (!UV)
+ User *U = WorkList.pop_back_val();
+ if (!Visited.insert(U).second)
continue;
- User *U = cast<User>(UV);
+ if (auto *BO = dyn_cast<BitCastOperator>(U))
+ append_range(WorkList, BO->users());
+ if (auto *ASC = dyn_cast<AddrSpaceCastOperator>(U))
+ append_range(WorkList, ASC->users());
+ else if (auto *GEP = dyn_cast<GEPOperator>(U))
+ append_range(WorkList, GEP->users());
+ else if (auto *LI = dyn_cast<LoadInst>(U)) {
+ // A load from zeroinitializer is always zeroinitializer, regardless of
+ // any applied offset.
+ if (Init->isNullValue()) {
+ LI->replaceAllUsesWith(Constant::getNullValue(LI->getType()));
+ EraseFromParent(LI);
+ continue;
+ }
- if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
- if (Init) {
- if (auto *Casted =
- ConstantFoldLoadThroughBitcast(Init, LI->getType(), DL)) {
- // Replace the load with the initializer.
- LI->replaceAllUsesWith(Casted);
- LI->eraseFromParent();
- Changed = true;
+ Value *PtrOp = LI->getPointerOperand();
+ APInt Offset(DL.getIndexTypeSizeInBits(PtrOp->getType()), 0);
+ PtrOp = PtrOp->stripAndAccumulateConstantOffsets(
+ DL, Offset, /* AllowNonInbounds */ true);
+ if (PtrOp == GV) {
+ if (auto *Value = ConstantFoldLoadFromConst(Init, LI->getType(),
+ Offset, DL)) {
+ LI->replaceAllUsesWith(Value);
+ EraseFromParent(LI);
}
}
} else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
// Store must be unreachable or storing Init into the global.
- SI->eraseFromParent();
- Changed = true;
- } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
- if (CE->getOpcode() == Instruction::GetElementPtr) {
- Constant *SubInit = nullptr;
- if (Init)
- SubInit = ConstantFoldLoadThroughGEPConstantExpr(
- Init, CE, V->getType()->getPointerElementType(), DL);
- Changed |= CleanupConstantGlobalUsers(CE, SubInit, DL, GetTLI);
- } else if ((CE->getOpcode() == Instruction::BitCast &&
- CE->getType()->isPointerTy()) ||
- CE->getOpcode() == Instruction::AddrSpaceCast) {
- // Pointer cast, delete any stores and memsets to the global.
- Changed |= CleanupConstantGlobalUsers(CE, nullptr, DL, GetTLI);
- }
-
- if (CE->use_empty()) {
- CE->destroyConstant();
- Changed = true;
- }
- } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
- // Do not transform "gepinst (gep constexpr (GV))" here, because forming
- // "gepconstexpr (gep constexpr (GV))" will cause the two gep's to fold
- // and will invalidate our notion of what Init is.
- Constant *SubInit = nullptr;
- if (!isa<ConstantExpr>(GEP->getOperand(0))) {
- ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(
- ConstantFoldInstruction(GEP, DL, &GetTLI(*GEP->getFunction())));
- if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr)
- SubInit = ConstantFoldLoadThroughGEPConstantExpr(
- Init, CE, V->getType()->getPointerElementType(), DL);
-
- // If the initializer is an all-null value and we have an inbounds GEP,
- // we already know what the result of any load from that GEP is.
- // TODO: Handle splats.
- if (Init && isa<ConstantAggregateZero>(Init) && GEP->isInBounds())
- SubInit = Constant::getNullValue(GEP->getResultElementType());
- }
- Changed |= CleanupConstantGlobalUsers(GEP, SubInit, DL, GetTLI);
-
- if (GEP->use_empty()) {
- GEP->eraseFromParent();
- Changed = true;
- }
+ EraseFromParent(SI);
} else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U)) { // memset/cpy/mv
- if (MI->getRawDest() == V) {
- MI->eraseFromParent();
- Changed = true;
- }
-
- } else if (Constant *C = dyn_cast<Constant>(U)) {
- // If we have a chain of dead constantexprs or other things dangling from
- // us, and if they are all dead, nuke them without remorse.
- if (isSafeToDestroyConstant(C)) {
- C->destroyConstant();
- CleanupConstantGlobalUsers(V, Init, DL, GetTLI);
- return true;
- }
+ if (getUnderlyingObject(MI->getRawDest()) == GV)
+ EraseFromParent(MI);
}
}
+
+ Changed |=
+ RecursivelyDeleteTriviallyDeadInstructionsPermissive(MaybeDeadInsts);
+ GV->removeDeadConstantUsers();
return Changed;
}
@@ -889,7 +860,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(
Changed |= CleanupPointerRootUsers(GV, GetTLI);
} else {
Changed = true;
- CleanupConstantGlobalUsers(GV, nullptr, DL, GetTLI);
+ CleanupConstantGlobalUsers(GV, DL);
}
if (GV->use_empty()) {
LLVM_DEBUG(dbgs() << " *** GLOBAL NOW DEAD!\n");
@@ -1557,8 +1528,7 @@ processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS,
} else {
// Delete any stores we can find to the global. We may not be able to
// make it completely dead though.
- Changed =
- CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, GetTLI);
+ Changed = CleanupConstantGlobalUsers(GV, DL);
}
// If the global is dead now, delete it.
@@ -1583,7 +1553,7 @@ processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS,
}
// Clean up any obviously simplifiable users now.
- Changed |= CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, GetTLI);
+ Changed |= CleanupConstantGlobalUsers(GV, DL);
// If the global is dead now, just nuke it.
if (GV->use_empty()) {
@@ -1628,7 +1598,7 @@ processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS,
GV->setInitializer(SOVConstant);
// Clean up any obviously simplifiable users now.
- CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, GetTLI);
+ CleanupConstantGlobalUsers(GV, DL);
if (GV->use_empty()) {
LLVM_DEBUG(dbgs() << " *** Substituting initializer allowed us to "
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index f342c35fa283..055ee6b50296 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -1885,6 +1885,7 @@ private:
OMPRTL___kmpc_barrier_simple_generic);
ExternalizationRAII ThreadId(OMPInfoCache,
OMPRTL___kmpc_get_hardware_thread_id_in_block);
+ ExternalizationRAII WarpSize(OMPInfoCache, OMPRTL___kmpc_get_warp_size);
registerAAs(IsModulePass);
@@ -3727,12 +3728,37 @@ struct AAKernelInfoFunction : AAKernelInfo {
CheckRWInst, *this, UsedAssumedInformationInCheckRWInst))
SPMDCompatibilityTracker.indicatePessimisticFixpoint();
+ bool UsedAssumedInformationFromReachingKernels = false;
if (!IsKernelEntry) {
- updateReachingKernelEntries(A);
updateParallelLevels(A);
+ bool AllReachingKernelsKnown = true;
+ updateReachingKernelEntries(A, AllReachingKernelsKnown);
+ UsedAssumedInformationFromReachingKernels = !AllReachingKernelsKnown;
+
if (!ParallelLevels.isValidState())
SPMDCompatibilityTracker.indicatePessimisticFixpoint();
+ else if (!ReachingKernelEntries.isValidState())
+ SPMDCompatibilityTracker.indicatePessimisticFixpoint();
+ else if (!SPMDCompatibilityTracker.empty()) {
+ // Check if all reaching kernels agree on the mode as we can otherwise
+ // not guard instructions. We might not be sure about the mode so we
+ // we cannot fix the internal spmd-zation state either.
+ int SPMD = 0, Generic = 0;
+ for (auto *Kernel : ReachingKernelEntries) {
+ auto &CBAA = A.getAAFor<AAKernelInfo>(
+ *this, IRPosition::function(*Kernel), DepClassTy::OPTIONAL);
+ if (CBAA.SPMDCompatibilityTracker.isValidState() &&
+ CBAA.SPMDCompatibilityTracker.isAssumed())
+ ++SPMD;
+ else
+ ++Generic;
+ if (!CBAA.SPMDCompatibilityTracker.isAtFixpoint())
+ UsedAssumedInformationFromReachingKernels = true;
+ }
+ if (SPMD != 0 && Generic != 0)
+ SPMDCompatibilityTracker.indicatePessimisticFixpoint();
+ }
}
// Callback to check a call instruction.
@@ -3779,7 +3805,8 @@ struct AAKernelInfoFunction : AAKernelInfo {
// If we haven't used any assumed information for the SPMD state we can fix
// it.
if (!UsedAssumedInformationInCheckRWInst &&
- !UsedAssumedInformationInCheckCallInst && AllSPMDStatesWereFixed)
+ !UsedAssumedInformationInCheckCallInst &&
+ !UsedAssumedInformationFromReachingKernels && AllSPMDStatesWereFixed)
SPMDCompatibilityTracker.indicateOptimisticFixpoint();
return StateBefore == getState() ? ChangeStatus::UNCHANGED
@@ -3788,7 +3815,8 @@ struct AAKernelInfoFunction : AAKernelInfo {
private:
/// Update info regarding reaching kernels.
- void updateReachingKernelEntries(Attributor &A) {
+ void updateReachingKernelEntries(Attributor &A,
+ bool &AllReachingKernelsKnown) {
auto PredCallSite = [&](AbstractCallSite ACS) {
Function *Caller = ACS.getInstruction()->getFunction();
@@ -3808,10 +3836,9 @@ private:
return true;
};
- bool AllCallSitesKnown;
if (!A.checkForAllCallSites(PredCallSite, *this,
true /* RequireAllCallSites */,
- AllCallSitesKnown))
+ AllReachingKernelsKnown))
ReachingKernelEntries.indicatePessimisticFixpoint();
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/PartialInlining.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/PartialInlining.cpp
index 7402e399a88a..2d717475ce7f 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/PartialInlining.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/PartialInlining.cpp
@@ -641,8 +641,7 @@ PartialInlinerImpl::computeOutliningInfo(Function &F) const {
if (!CandidateFound)
return std::unique_ptr<FunctionOutliningInfo>();
- // Do sanity check of the entries: threre should not
- // be any successors (not in the entry set) other than
+ // There should not be any successors (not in the entry set) other than
// {ReturnBlock, NonReturnBlock}
assert(OutliningInfo->Entries[0] == &F.front() &&
"Function Entry must be the first in Entries vector");
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfile.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfile.cpp
index a961c47a7501..b8fac9d47763 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -84,6 +84,7 @@
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Utils/CallPromotionUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/SampleProfileInference.h"
#include "llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h"
#include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h"
#include <algorithm>
@@ -173,6 +174,9 @@ static cl::opt<bool>
cl::desc("Process functions in a top-down order "
"defined by the profiled call graph when "
"-sample-profile-top-down-load is on."));
+cl::opt<bool>
+ SortProfiledSCC("sort-profiled-scc-member", cl::init(true), cl::Hidden,
+ cl::desc("Sort profiled recursion by edge weights."));
static cl::opt<bool> ProfileSizeInline(
"sample-profile-inline-size", cl::Hidden, cl::init(false),
@@ -1648,6 +1652,19 @@ void SampleProfileLoader::generateMDProfMetadata(Function &F) {
SmallVector<uint32_t, 4> Weights;
uint32_t MaxWeight = 0;
Instruction *MaxDestInst;
+ // Since profi treats multiple edges (multiway branches) as a single edge,
+ // we need to distribute the computed weight among the branches. We do
+ // this by evenly splitting the edge weight among destinations.
+ DenseMap<const BasicBlock *, uint64_t> EdgeMultiplicity;
+ std::vector<uint64_t> EdgeIndex;
+ if (SampleProfileUseProfi) {
+ EdgeIndex.resize(TI->getNumSuccessors());
+ for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) {
+ const BasicBlock *Succ = TI->getSuccessor(I);
+ EdgeIndex[I] = EdgeMultiplicity[Succ];
+ EdgeMultiplicity[Succ]++;
+ }
+ }
for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) {
BasicBlock *Succ = TI->getSuccessor(I);
Edge E = std::make_pair(BB, Succ);
@@ -1660,9 +1677,19 @@ void SampleProfileLoader::generateMDProfMetadata(Function &F) {
LLVM_DEBUG(dbgs() << " (saturated due to uint32_t overflow)");
Weight = std::numeric_limits<uint32_t>::max();
}
- // Weight is added by one to avoid propagation errors introduced by
- // 0 weights.
- Weights.push_back(static_cast<uint32_t>(Weight + 1));
+ if (!SampleProfileUseProfi) {
+ // Weight is added by one to avoid propagation errors introduced by
+ // 0 weights.
+ Weights.push_back(static_cast<uint32_t>(Weight + 1));
+ } else {
+ // Profi creates proper weights that do not require "+1" adjustments but
+ // we evenly split the weight among branches with the same destination.
+ uint64_t W = Weight / EdgeMultiplicity[Succ];
+ // Rounding up, if needed, so that first branches are hotter.
+ if (EdgeIndex[I] < Weight % EdgeMultiplicity[Succ])
+ W++;
+ Weights.push_back(static_cast<uint32_t>(W));
+ }
if (Weight != 0) {
if (Weight > MaxWeight) {
MaxWeight = Weight;
@@ -1853,7 +1880,13 @@ SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) {
std::unique_ptr<ProfiledCallGraph> ProfiledCG = buildProfiledCallGraph(*CG);
scc_iterator<ProfiledCallGraph *> CGI = scc_begin(ProfiledCG.get());
while (!CGI.isAtEnd()) {
- for (ProfiledCallGraphNode *Node : *CGI) {
+ auto Range = *CGI;
+ if (SortProfiledSCC) {
+ // Sort nodes in one SCC based on callsite hotness.
+ scc_member_iterator<ProfiledCallGraph *> SI(*CGI);
+ Range = *SI;
+ }
+ for (auto *Node : Range) {
Function *F = SymbolMap.lookup(Node->Name);
if (F && !F->isDeclaration() && F->hasFnAttribute("use-sample-profile"))
FunctionOrderList.push_back(F);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 06c9bf650f37..dc55b5a31596 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -1727,16 +1727,18 @@ static Instruction *foldComplexAndOrPatterns(BinaryOperator &I,
(Opcode == Instruction::And) ? Instruction::Or : Instruction::And;
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
- Value *A, *B, *C;
+ Value *A, *B, *C, *X, *Y;
// (~(A | B) & C) | ... --> ...
// (~(A & B) | C) & ... --> ...
// TODO: One use checks are conservative. We just need to check that a total
// number of multiple used values does not exceed reduction
// in operations.
- if (match(Op0, m_c_BinOp(FlippedOpcode,
- m_Not(m_BinOp(Opcode, m_Value(A), m_Value(B))),
- m_Value(C)))) {
+ if (match(Op0,
+ m_c_BinOp(FlippedOpcode,
+ m_CombineAnd(m_Value(X), m_Not(m_BinOp(Opcode, m_Value(A),
+ m_Value(B)))),
+ m_Value(C)))) {
// (~(A | B) & C) | (~(A | C) & B) --> (B ^ C) & ~A
// (~(A & B) | C) & (~(A & C) | B) --> ~((B ^ C) & A)
if (match(Op1,
@@ -1776,6 +1778,21 @@ static Instruction *foldComplexAndOrPatterns(BinaryOperator &I,
m_c_BinOp(Opcode, m_Specific(B), m_Specific(C)))))))
return BinaryOperator::CreateNot(Builder.CreateBinOp(
Opcode, Builder.CreateBinOp(FlippedOpcode, A, C), B));
+
+ // (~(A | B) & C) | ~(C | (A ^ B)) --> ~((A | B) & (C | (A ^ B)))
+ // Note, the pattern with swapped and/or is not handled because the
+ // result is more undefined than a source:
+ // (~(A & B) | C) & ~(C & (A ^ B)) --> (A ^ B ^ C) | ~(A | C) is invalid.
+ if (Opcode == Instruction::Or && Op0->hasOneUse() &&
+ match(Op1, m_OneUse(m_Not(m_CombineAnd(
+ m_Value(Y),
+ m_c_BinOp(Opcode, m_Specific(C),
+ m_c_Xor(m_Specific(A), m_Specific(B)))))))) {
+ // X = ~(A | B)
+ // Y = (C | (A ^ B)
+ Value *Or = cast<BinaryOperator>(X)->getOperand(0);
+ return BinaryOperator::CreateNot(Builder.CreateAnd(Or, Y));
+ }
}
return nullptr;
@@ -2061,7 +2078,14 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
if (Instruction *CastedAnd = foldCastedBitwiseLogic(I))
return CastedAnd;
+ if (Instruction *Sel = foldBinopOfSextBoolToSelect(I))
+ return Sel;
+
// and(sext(A), B) / and(B, sext(A)) --> A ? B : 0, where A is i1 or <N x i1>.
+ // TODO: Move this into foldBinopOfSextBoolToSelect as a more generalized fold
+ // with binop identity constant. But creating a select with non-constant
+ // arm may not be reversible due to poison semantics. Is that a good
+ // canonicalization?
Value *A;
if (match(Op0, m_OneUse(m_SExt(m_Value(A)))) &&
A->getType()->isIntOrIntVectorTy(1))
@@ -2322,11 +2346,20 @@ Value *InstCombinerImpl::getSelectCondition(Value *A, Value *B) {
Value *Cond;
Value *NotB;
if (match(A, m_SExt(m_Value(Cond))) &&
- Cond->getType()->isIntOrIntVectorTy(1) &&
- match(B, m_OneUse(m_Not(m_Value(NotB))))) {
- NotB = peekThroughBitcast(NotB, true);
- if (match(NotB, m_SExt(m_Specific(Cond))))
+ Cond->getType()->isIntOrIntVectorTy(1)) {
+ // A = sext i1 Cond; B = sext (not (i1 Cond))
+ if (match(B, m_SExt(m_Not(m_Specific(Cond)))))
return Cond;
+
+ // A = sext i1 Cond; B = not ({bitcast} (sext (i1 Cond)))
+ // TODO: The one-use checks are unnecessary or misplaced. If the caller
+ // checked for uses on logic ops/casts, that should be enough to
+ // make this transform worthwhile.
+ if (match(B, m_OneUse(m_Not(m_Value(NotB))))) {
+ NotB = peekThroughBitcast(NotB, true);
+ if (match(NotB, m_SExt(m_Specific(Cond))))
+ return Cond;
+ }
}
// All scalar (and most vector) possibilities should be handled now.
@@ -2569,7 +2602,8 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
return replaceInstUsesWith(I, V);
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
- if (I.getType()->isIntOrIntVectorTy(1)) {
+ Type *Ty = I.getType();
+ if (Ty->isIntOrIntVectorTy(1)) {
if (auto *SI0 = dyn_cast<SelectInst>(Op0)) {
if (auto *I =
foldAndOrOfSelectUsingImpliedCond(Op1, *SI0, /* IsAnd */ false))
@@ -2602,7 +2636,16 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
// (X ^ C) | Y -> (X | Y) ^ C iff Y & C == 0
// The check for a 'not' op is for efficiency (if Y is known zero --> ~X).
Value *Or = Builder.CreateOr(X, Y);
- return BinaryOperator::CreateXor(Or, ConstantInt::get(I.getType(), *CV));
+ return BinaryOperator::CreateXor(Or, ConstantInt::get(Ty, *CV));
+ }
+
+ // If the operands have no common bits set:
+ // or (mul X, Y), X --> add (mul X, Y), X --> mul X, (Y + 1)
+ if (match(&I,
+ m_c_Or(m_OneUse(m_Mul(m_Value(X), m_Value(Y))), m_Deferred(X))) &&
+ haveNoCommonBitsSet(Op0, Op1, DL)) {
+ Value *IncrementY = Builder.CreateAdd(Y, ConstantInt::get(Ty, 1));
+ return BinaryOperator::CreateMul(X, IncrementY);
}
// (A & C) | (B & D)
@@ -2635,14 +2678,14 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
// iff (C0 & C1) == 0 and (X & ~C0) == 0
if (match(A, m_c_Or(m_Value(X), m_Specific(B))) &&
MaskedValueIsZero(X, ~*C0, 0, &I)) {
- Constant *C01 = ConstantInt::get(I.getType(), *C0 | *C1);
+ Constant *C01 = ConstantInt::get(Ty, *C0 | *C1);
return BinaryOperator::CreateAnd(A, C01);
}
// (A & C0) | ((X | A) & C1) --> (X | A) & (C0 | C1)
// iff (C0 & C1) == 0 and (X & ~C1) == 0
if (match(B, m_c_Or(m_Value(X), m_Specific(A))) &&
MaskedValueIsZero(X, ~*C1, 0, &I)) {
- Constant *C01 = ConstantInt::get(I.getType(), *C0 | *C1);
+ Constant *C01 = ConstantInt::get(Ty, *C0 | *C1);
return BinaryOperator::CreateAnd(B, C01);
}
// ((X | C2) & C0) | ((X | C3) & C1) --> (X | C2 | C3) & (C0 | C1)
@@ -2652,7 +2695,7 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
match(B, m_Or(m_Specific(X), m_APInt(C3))) &&
(*C2 & ~*C0).isZero() && (*C3 & ~*C1).isZero()) {
Value *Or = Builder.CreateOr(X, *C2 | *C3, "bitfield");
- Constant *C01 = ConstantInt::get(I.getType(), *C0 | *C1);
+ Constant *C01 = ConstantInt::get(Ty, *C0 | *C1);
return BinaryOperator::CreateAnd(Or, C01);
}
}
@@ -2788,13 +2831,20 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
if (Instruction *CastedOr = foldCastedBitwiseLogic(I))
return CastedOr;
+ if (Instruction *Sel = foldBinopOfSextBoolToSelect(I))
+ return Sel;
+
// or(sext(A), B) / or(B, sext(A)) --> A ? -1 : B, where A is i1 or <N x i1>.
+ // TODO: Move this into foldBinopOfSextBoolToSelect as a more generalized fold
+ // with binop identity constant. But creating a select with non-constant
+ // arm may not be reversible due to poison semantics. Is that a good
+ // canonicalization?
if (match(Op0, m_OneUse(m_SExt(m_Value(A)))) &&
A->getType()->isIntOrIntVectorTy(1))
- return SelectInst::Create(A, ConstantInt::getSigned(I.getType(), -1), Op1);
+ return SelectInst::Create(A, ConstantInt::getAllOnesValue(Ty), Op1);
if (match(Op1, m_OneUse(m_SExt(m_Value(A)))) &&
A->getType()->isIntOrIntVectorTy(1))
- return SelectInst::Create(A, ConstantInt::getSigned(I.getType(), -1), Op0);
+ return SelectInst::Create(A, ConstantInt::getAllOnesValue(Ty), Op0);
// Note: If we've gotten to the point of visiting the outer OR, then the
// inner one couldn't be simplified. If it was a constant, then it won't
@@ -2826,7 +2876,6 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
// or(ashr(subNSW(Y, X), ScalarSizeInBits(Y) - 1), X) --> X s> Y ? -1 : X.
{
Value *X, *Y;
- Type *Ty = I.getType();
if (match(&I, m_c_Or(m_OneUse(m_AShr(
m_NSWSub(m_Value(Y), m_Value(X)),
m_SpecificInt(Ty->getScalarSizeInBits() - 1))),
@@ -2876,7 +2925,6 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
if (match(&I, m_c_Or(m_Add(m_Shl(m_One(), m_Value(X)), m_AllOnes()),
m_Shl(m_One(), m_Deferred(X)))) &&
match(&I, m_c_Or(m_OneUse(m_Value()), m_Value()))) {
- Type *Ty = X->getType();
Value *Sub = Builder.CreateSub(
ConstantInt::get(Ty, Ty->getScalarSizeInBits() - 1), X);
return BinaryOperator::CreateLShr(Constant::getAllOnesValue(Ty), Sub);
@@ -3601,6 +3649,14 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) {
if (match(&I, m_c_Xor(m_c_And(m_Not(m_Value(A)), m_Value(B)), m_Deferred(A))))
return BinaryOperator::CreateOr(A, B);
+ // (~A | B) ^ A --> ~(A & B)
+ if (match(Op0, m_OneUse(m_c_Or(m_Not(m_Specific(Op1)), m_Value(B)))))
+ return BinaryOperator::CreateNot(Builder.CreateAnd(Op1, B));
+
+ // A ^ (~A | B) --> ~(A & B)
+ if (match(Op1, m_OneUse(m_c_Or(m_Not(m_Specific(Op0)), m_Value(B)))))
+ return BinaryOperator::CreateNot(Builder.CreateAnd(Op0, B));
+
// (A | B) ^ (A | C) --> (B ^ C) & ~A -- There are 4 commuted variants.
// TODO: Loosen one-use restriction if common operand is a constant.
Value *D;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index bfa7bfa2290a..7da2669e1d13 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -2641,7 +2641,7 @@ Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) {
ArgNo++;
}
- assert(ArgNo == Call.arg_size() && "sanity check");
+ assert(ArgNo == Call.arg_size() && "Call arguments not processed correctly.");
if (!ArgNos.empty()) {
AttributeList AS = Call.getAttributes();
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index ca87477c5d81..33f217659c01 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -2771,7 +2771,7 @@ Instruction *InstCombinerImpl::visitBitCast(BitCastInst &CI) {
if (match(Src, m_OneUse(m_InsertElt(m_OneUse(m_BitCast(m_Value(X))),
m_Value(Y), m_ConstantInt(IndexC)))) &&
DestTy->isIntegerTy() && X->getType() == DestTy &&
- isDesirableIntType(BitWidth)) {
+ Y->getType()->isIntegerTy() && isDesirableIntType(BitWidth)) {
// Adjust for big endian - the LSBs are at the high index.
if (DL.isBigEndian())
IndexC = SrcVTy->getNumElements() - 1 - IndexC;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 7a9e177f19da..ed53b88aed61 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -14,6 +14,7 @@
#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/CmpInstAnalysis.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
@@ -1894,23 +1895,6 @@ Instruction *InstCombinerImpl::foldICmpAndConstant(ICmpInst &Cmp,
return new ICmpInst(NewPred, X, SubOne(cast<Constant>(Cmp.getOperand(1))));
}
- // (X & C2) == 0 -> (trunc X) >= 0
- // (X & C2) != 0 -> (trunc X) < 0
- // iff C2 is a power of 2 and it masks the sign bit of a legal integer type.
- const APInt *C2;
- if (And->hasOneUse() && C.isZero() && match(Y, m_APInt(C2))) {
- int32_t ExactLogBase2 = C2->exactLogBase2();
- if (ExactLogBase2 != -1 && DL.isLegalInteger(ExactLogBase2 + 1)) {
- Type *NTy = IntegerType::get(Cmp.getContext(), ExactLogBase2 + 1);
- if (auto *AndVTy = dyn_cast<VectorType>(And->getType()))
- NTy = VectorType::get(NTy, AndVTy->getElementCount());
- Value *Trunc = Builder.CreateTrunc(X, NTy);
- auto NewPred =
- Pred == CmpInst::ICMP_EQ ? CmpInst::ICMP_SGE : CmpInst::ICMP_SLT;
- return new ICmpInst(NewPred, Trunc, Constant::getNullValue(NTy));
- }
- }
-
return nullptr;
}
@@ -2803,7 +2787,8 @@ bool InstCombinerImpl::matchThreeWayIntCompare(SelectInst *SI, Value *&LHS,
PredB, cast<Constant>(RHS2));
if (!FlippedStrictness)
return false;
- assert(FlippedStrictness->first == ICmpInst::ICMP_SGE && "Sanity check");
+ assert(FlippedStrictness->first == ICmpInst::ICMP_SGE &&
+ "basic correctness failure");
RHS2 = FlippedStrictness->second;
// And kind-of perform the result swap.
std::swap(Less, Greater);
@@ -4614,7 +4599,7 @@ Instruction *InstCombinerImpl::foldICmpEquality(ICmpInst &I) {
static Instruction *foldICmpWithTrunc(ICmpInst &ICmp,
InstCombiner::BuilderTy &Builder) {
- const ICmpInst::Predicate Pred = ICmp.getPredicate();
+ ICmpInst::Predicate Pred = ICmp.getPredicate();
Value *Op0 = ICmp.getOperand(0), *Op1 = ICmp.getOperand(1);
// Try to canonicalize trunc + compare-to-constant into a mask + cmp.
@@ -4624,41 +4609,31 @@ static Instruction *foldICmpWithTrunc(ICmpInst &ICmp,
if (!match(Op0, m_OneUse(m_Trunc(m_Value(X)))) || !match(Op1, m_APInt(C)))
return nullptr;
+ // This matches patterns corresponding to tests of the signbit as well as:
+ // (trunc X) u< C --> (X & -C) == 0 (are all masked-high-bits clear?)
+ // (trunc X) u> C --> (X & ~C) != 0 (are any masked-high-bits set?)
+ APInt Mask;
+ if (decomposeBitTestICmp(Op0, Op1, Pred, X, Mask, true /* WithTrunc */)) {
+ Value *And = Builder.CreateAnd(X, Mask);
+ Constant *Zero = ConstantInt::getNullValue(X->getType());
+ return new ICmpInst(Pred, And, Zero);
+ }
+
unsigned SrcBits = X->getType()->getScalarSizeInBits();
- if (Pred == ICmpInst::ICMP_ULT) {
- if (C->isPowerOf2()) {
- // If C is a power-of-2 (one set bit):
- // (trunc X) u< C --> (X & -C) == 0 (are all masked-high-bits clear?)
- Constant *MaskC = ConstantInt::get(X->getType(), (-*C).zext(SrcBits));
- Value *And = Builder.CreateAnd(X, MaskC);
- Constant *Zero = ConstantInt::getNullValue(X->getType());
- return new ICmpInst(ICmpInst::ICMP_EQ, And, Zero);
- }
+ if (Pred == ICmpInst::ICMP_ULT && C->isNegatedPowerOf2()) {
// If C is a negative power-of-2 (high-bit mask):
// (trunc X) u< C --> (X & C) != C (are any masked-high-bits clear?)
- if (C->isNegatedPowerOf2()) {
- Constant *MaskC = ConstantInt::get(X->getType(), C->zext(SrcBits));
- Value *And = Builder.CreateAnd(X, MaskC);
- return new ICmpInst(ICmpInst::ICMP_NE, And, MaskC);
- }
+ Constant *MaskC = ConstantInt::get(X->getType(), C->zext(SrcBits));
+ Value *And = Builder.CreateAnd(X, MaskC);
+ return new ICmpInst(ICmpInst::ICMP_NE, And, MaskC);
}
- if (Pred == ICmpInst::ICMP_UGT) {
- // If C is a low-bit-mask (C+1 is a power-of-2):
- // (trunc X) u> C --> (X & ~C) != 0 (are any masked-high-bits set?)
- if (C->isMask()) {
- Constant *MaskC = ConstantInt::get(X->getType(), (~*C).zext(SrcBits));
- Value *And = Builder.CreateAnd(X, MaskC);
- Constant *Zero = ConstantInt::getNullValue(X->getType());
- return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
- }
+ if (Pred == ICmpInst::ICMP_UGT && (~*C).isPowerOf2()) {
// If C is not-of-power-of-2 (one clear bit):
// (trunc X) u> C --> (X & (C+1)) == C+1 (are all masked-high-bits set?)
- if ((~*C).isPowerOf2()) {
- Constant *MaskC = ConstantInt::get(X->getType(), (*C + 1).zext(SrcBits));
- Value *And = Builder.CreateAnd(X, MaskC);
- return new ICmpInst(ICmpInst::ICMP_EQ, And, MaskC);
- }
+ Constant *MaskC = ConstantInt::get(X->getType(), (*C + 1).zext(SrcBits));
+ Value *And = Builder.CreateAnd(X, MaskC);
+ return new ICmpInst(ICmpInst::ICMP_EQ, And, MaskC);
}
return nullptr;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 72e1b21e8d49..20c75188ec9f 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -319,6 +319,7 @@ private:
Instruction *scalarizePHI(ExtractElementInst &EI, PHINode *PN);
Instruction *foldBitcastExtElt(ExtractElementInst &ExtElt);
Instruction *foldCastedBitwiseLogic(BinaryOperator &I);
+ Instruction *foldBinopOfSextBoolToSelect(BinaryOperator &I);
Instruction *narrowBinOp(TruncInst &Trunc);
Instruction *narrowMaskedBinOp(BinaryOperator &And);
Instruction *narrowMathIfNoOverflow(BinaryOperator &I);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp
index 7dc516c6fdc3..42ba4a34a5a9 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp
@@ -403,7 +403,7 @@ LLVM_NODISCARD Value *Negator::visitImpl(Value *V, unsigned Depth) {
NonNegatedOps.emplace_back(Op); // Just record which operand that was.
}
assert((NegatedOps.size() + NonNegatedOps.size()) == 2 &&
- "Internal consistency sanity check.");
+ "Internal consistency check failed.");
// Did we manage to sink negation into both of the operands?
if (NegatedOps.size() == 2) // Then we get to keep the `add`!
return Builder.CreateAdd(NegatedOps[0], NegatedOps[1],
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 4a1e82ae9c1d..518d3952dce5 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -246,12 +246,16 @@ static Value *foldSelectICmpAnd(SelectInst &Sel, ICmpInst *Cmp,
static unsigned getSelectFoldableOperands(BinaryOperator *I) {
switch (I->getOpcode()) {
case Instruction::Add:
+ case Instruction::FAdd:
case Instruction::Mul:
+ case Instruction::FMul:
case Instruction::And:
case Instruction::Or:
case Instruction::Xor:
return 3; // Can fold through either operand.
case Instruction::Sub: // Can only fold on the amount subtracted.
+ case Instruction::FSub:
+ case Instruction::FDiv: // Can only fold on the divisor amount.
case Instruction::Shl: // Can only fold on the shift amount.
case Instruction::LShr:
case Instruction::AShr:
diff --git a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 47b6dcb67a78..1f81624f79e7 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -967,6 +967,29 @@ Value *InstCombinerImpl::dyn_castNegVal(Value *V) const {
return nullptr;
}
+/// A binop with a constant operand and a sign-extended boolean operand may be
+/// converted into a select of constants by applying the binary operation to
+/// the constant with the two possible values of the extended boolean (0 or -1).
+Instruction *InstCombinerImpl::foldBinopOfSextBoolToSelect(BinaryOperator &BO) {
+ // TODO: Handle non-commutative binop (constant is operand 0).
+ // TODO: Handle zext.
+ // TODO: Peek through 'not' of cast.
+ Value *BO0 = BO.getOperand(0);
+ Value *BO1 = BO.getOperand(1);
+ Value *X;
+ Constant *C;
+ if (!match(BO0, m_SExt(m_Value(X))) || !match(BO1, m_ImmConstant(C)) ||
+ !X->getType()->isIntOrIntVectorTy(1))
+ return nullptr;
+
+ // bo (sext i1 X), C --> select X, (bo -1, C), (bo 0, C)
+ Constant *Ones = ConstantInt::getAllOnesValue(BO.getType());
+ Constant *Zero = ConstantInt::getNullValue(BO.getType());
+ Constant *TVal = ConstantExpr::get(BO.getOpcode(), Ones, C);
+ Constant *FVal = ConstantExpr::get(BO.getOpcode(), Zero, C);
+ return SelectInst::Create(X, TVal, FVal);
+}
+
static Value *foldOperationIntoSelectOperand(Instruction &I, Value *SO,
InstCombiner::BuilderTy &Builder) {
if (auto *Cast = dyn_cast<CastInst>(&I))
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index b56329ad76ae..bd2dc8d639fc 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -6,7 +6,8 @@
//
//===----------------------------------------------------------------------===//
//
-// This file is a part of AddressSanitizer, an address sanity checker.
+// This file is a part of AddressSanitizer, an address basic correctness
+// checker.
// Details of the algorithm:
// https://github.com/google/sanitizers/wiki/AddressSanitizerAlgorithm
//
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
index 62c265e40dab..8d3bc1383e96 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
@@ -7,8 +7,8 @@
//===----------------------------------------------------------------------===//
//
/// \file
-/// This file is a part of HWAddressSanitizer, an address sanity checker
-/// based on tagged addressing.
+/// This file is a part of HWAddressSanitizer, an address basic correctness
+/// checker based on tagged addressing.
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h"
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index 36a66e096382..d1d3b8ffdf7a 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -64,10 +64,10 @@ cl::opt<bool> DoHashBasedCounterSplit(
cl::desc("Rename counter variable of a comdat function based on cfg hash"),
cl::init(true));
-cl::opt<bool> RuntimeCounterRelocation(
- "runtime-counter-relocation",
- cl::desc("Enable relocating counters at runtime."),
- cl::init(false));
+cl::opt<bool>
+ RuntimeCounterRelocation("runtime-counter-relocation",
+ cl::desc("Enable relocating counters at runtime."),
+ cl::init(false));
cl::opt<bool> ValueProfileStaticAlloc(
"vp-static-alloc",
@@ -331,8 +331,9 @@ private:
// Check whether the loop satisfies the basic conditions needed to perform
// Counter Promotions.
- bool isPromotionPossible(Loop *LP,
- const SmallVectorImpl<BasicBlock *> &LoopExitBlocks) {
+ bool
+ isPromotionPossible(Loop *LP,
+ const SmallVectorImpl<BasicBlock *> &LoopExitBlocks) {
// We can't insert into a catchswitch.
if (llvm::any_of(LoopExitBlocks, [](BasicBlock *Exit) {
return isa<CatchSwitchInst>(Exit->getTerminator());
@@ -421,13 +422,13 @@ PreservedAnalyses InstrProfiling::run(Module &M, ModuleAnalysisManager &AM) {
}
char InstrProfilingLegacyPass::ID = 0;
-INITIALIZE_PASS_BEGIN(
- InstrProfilingLegacyPass, "instrprof",
- "Frontend instrumentation-based coverage lowering.", false, false)
+INITIALIZE_PASS_BEGIN(InstrProfilingLegacyPass, "instrprof",
+ "Frontend instrumentation-based coverage lowering.",
+ false, false)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_END(
- InstrProfilingLegacyPass, "instrprof",
- "Frontend instrumentation-based coverage lowering.", false, false)
+INITIALIZE_PASS_END(InstrProfilingLegacyPass, "instrprof",
+ "Frontend instrumentation-based coverage lowering.", false,
+ false)
ModulePass *
llvm::createInstrProfilingLegacyPass(const InstrProfOptions &Options,
@@ -634,13 +635,9 @@ void InstrProfiling::computeNumValueSiteCounts(InstrProfValueProfileInst *Ind) {
GlobalVariable *Name = Ind->getName();
uint64_t ValueKind = Ind->getValueKind()->getZExtValue();
uint64_t Index = Ind->getIndex()->getZExtValue();
- auto It = ProfileDataMap.find(Name);
- if (It == ProfileDataMap.end()) {
- PerFunctionProfileData PD;
- PD.NumValueSites[ValueKind] = Index + 1;
- ProfileDataMap[Name] = PD;
- } else if (It->second.NumValueSites[ValueKind] <= Index)
- It->second.NumValueSites[ValueKind] = Index + 1;
+ auto &PD = ProfileDataMap[Name];
+ PD.NumValueSites[ValueKind] =
+ std::max(PD.NumValueSites[ValueKind], (uint32_t)(Index + 1));
}
void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
@@ -703,14 +700,15 @@ void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) {
LoadInst *LI = dyn_cast<LoadInst>(&I);
if (!LI) {
IRBuilder<> Builder(&I);
- GlobalVariable *Bias = M->getGlobalVariable(getInstrProfCounterBiasVarName());
+ GlobalVariable *Bias =
+ M->getGlobalVariable(getInstrProfCounterBiasVarName());
if (!Bias) {
// Compiler must define this variable when runtime counter relocation
// is being used. Runtime has a weak external reference that is used
// to check whether that's the case or not.
- Bias = new GlobalVariable(*M, Int64Ty, false, GlobalValue::LinkOnceODRLinkage,
- Constant::getNullValue(Int64Ty),
- getInstrProfCounterBiasVarName());
+ Bias = new GlobalVariable(
+ *M, Int64Ty, false, GlobalValue::LinkOnceODRLinkage,
+ Constant::getNullValue(Int64Ty), getInstrProfCounterBiasVarName());
Bias->setVisibility(GlobalVariable::HiddenVisibility);
// A definition that's weak (linkonce_odr) without being in a COMDAT
// section wouldn't lead to link errors, but it would lead to a dead
@@ -839,8 +837,7 @@ static bool needsRuntimeRegistrationOfSectionRange(const Triple &TT) {
return false;
// Use linker script magic to get data/cnts/name start/end.
if (TT.isOSLinux() || TT.isOSFreeBSD() || TT.isOSNetBSD() ||
- TT.isOSSolaris() || TT.isOSFuchsia() || TT.isPS4CPU() ||
- TT.isOSWindows())
+ TT.isOSSolaris() || TT.isOSFuchsia() || TT.isPS4CPU() || TT.isOSWindows())
return false;
return true;
@@ -849,13 +846,9 @@ static bool needsRuntimeRegistrationOfSectionRange(const Triple &TT) {
GlobalVariable *
InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
GlobalVariable *NamePtr = Inc->getName();
- auto It = ProfileDataMap.find(NamePtr);
- PerFunctionProfileData PD;
- if (It != ProfileDataMap.end()) {
- if (It->second.RegionCounters)
- return It->second.RegionCounters;
- PD = It->second;
- }
+ auto &PD = ProfileDataMap[NamePtr];
+ if (PD.RegionCounters)
+ return PD.RegionCounters;
// Match the linkage and visibility of the name global.
Function *Fn = Inc->getParent()->getParent();
@@ -922,6 +915,7 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
CounterPtr->setAlignment(Align(8));
MaybeSetComdat(CounterPtr);
CounterPtr->setLinkage(Linkage);
+ PD.RegionCounters = CounterPtr;
auto *Int8PtrTy = Type::getInt8PtrTy(Ctx);
// Allocate statically the array of pointers to value profile nodes for
@@ -1000,9 +994,7 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
MaybeSetComdat(Data);
Data->setLinkage(Linkage);
- PD.RegionCounters = CounterPtr;
PD.DataVar = Data;
- ProfileDataMap[NamePtr] = PD;
// Mark the data variable as used so that it isn't stripped out.
CompilerUsedVars.push_back(Data);
@@ -1013,7 +1005,7 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
// Collect the referenced names to be used by emitNameData.
ReferencedNames.push_back(NamePtr);
- return CounterPtr;
+ return PD.RegionCounters;
}
void InstrProfiling::emitVNodes() {
@@ -1078,8 +1070,8 @@ void InstrProfiling::emitNameData() {
}
auto &Ctx = M->getContext();
- auto *NamesVal = ConstantDataArray::getString(
- Ctx, StringRef(CompressedNameStr), false);
+ auto *NamesVal =
+ ConstantDataArray::getString(Ctx, StringRef(CompressedNameStr), false);
NamesVar = new GlobalVariable(*M, NamesVal->getType(), true,
GlobalValue::PrivateLinkage, NamesVal,
getInstrProfNamesVarName());
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index f98e39d751f4..180012198c42 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -110,7 +110,7 @@ namespace {
/// the module.
struct ThreadSanitizer {
ThreadSanitizer() {
- // Sanity check options and warn user.
+ // Check options and warn user.
if (ClInstrumentReadBeforeWrite && ClCompoundReadBeforeWrite) {
errs()
<< "warning: Option -tsan-compound-read-before-write has no effect "
diff --git a/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp b/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
index 74e4eb07b219..4921209f041b 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
@@ -94,11 +94,9 @@ bool llvm::objcarc::CanUse(const Instruction *Inst, const Value *Ptr,
return false;
} else if (const auto *CS = dyn_cast<CallBase>(Inst)) {
// For calls, just check the arguments (and not the callee operand).
- for (auto OI = CS->arg_begin(), OE = CS->arg_end(); OI != OE; ++OI) {
- const Value *Op = *OI;
+ for (const Value *Op : CS->args())
if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Ptr, Op))
return true;
- }
return false;
} else if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
// Special-case stores, because we don't care about the stored value, just
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index ca9567dc7ac8..a3fd97079b1d 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -52,6 +52,11 @@ using namespace llvm;
#define DEBUG_TYPE "correlated-value-propagation"
+static cl::opt<bool> CanonicalizeICmpPredicatesToUnsigned(
+ "canonicalize-icmp-predicates-to-unsigned", cl::init(true), cl::Hidden,
+ cl::desc("Enables canonicalization of signed relational predicates to "
+ "unsigned (e.g. sgt => ugt)"));
+
STATISTIC(NumPhis, "Number of phis propagated");
STATISTIC(NumPhiCommon, "Number of phis deleted via common incoming value");
STATISTIC(NumSelects, "Number of selects propagated");
@@ -64,7 +69,8 @@ STATISTIC(NumSDivSRemsNarrowed,
STATISTIC(NumSDivs, "Number of sdiv converted to udiv");
STATISTIC(NumUDivURemsNarrowed,
"Number of udivs/urems whose width was decreased");
-STATISTIC(NumAShrs, "Number of ashr converted to lshr");
+STATISTIC(NumAShrsConverted, "Number of ashr converted to lshr");
+STATISTIC(NumAShrsRemoved, "Number of ashr removed");
STATISTIC(NumSRems, "Number of srem converted to urem");
STATISTIC(NumSExt, "Number of sext converted to zext");
STATISTIC(NumSICmps, "Number of signed icmp preds simplified to unsigned");
@@ -297,6 +303,9 @@ static bool processMemAccess(Instruction *I, LazyValueInfo *LVI) {
}
static bool processICmp(ICmpInst *Cmp, LazyValueInfo *LVI) {
+ if (!CanonicalizeICmpPredicatesToUnsigned)
+ return false;
+
// Only for signed relational comparisons of scalar integers.
if (Cmp->getType()->isVectorTy() ||
!Cmp->getOperand(0)->getType()->isIntegerTy())
@@ -376,13 +385,7 @@ static bool processSwitch(SwitchInst *I, LazyValueInfo *LVI,
// ConstantFoldTerminator() as the underlying SwitchInst can be changed.
SwitchInstProfUpdateWrapper SI(*I);
- APInt Low =
- APInt::getSignedMaxValue(Cond->getType()->getScalarSizeInBits());
- APInt High =
- APInt::getSignedMinValue(Cond->getType()->getScalarSizeInBits());
-
- SwitchInst::CaseIt CI = SI->case_begin();
- for (auto CE = SI->case_end(); CI != CE;) {
+ for (auto CI = SI->case_begin(), CE = SI->case_end(); CI != CE;) {
ConstantInt *Case = CI->getCaseValue();
LazyValueInfo::Tristate State =
LVI->getPredicateAt(CmpInst::ICMP_EQ, Cond, Case, I,
@@ -415,28 +418,9 @@ static bool processSwitch(SwitchInst *I, LazyValueInfo *LVI,
break;
}
- // Get Lower/Upper bound from switch cases.
- Low = APIntOps::smin(Case->getValue(), Low);
- High = APIntOps::smax(Case->getValue(), High);
-
// Increment the case iterator since we didn't delete it.
++CI;
}
-
- // Try to simplify default case as unreachable
- if (CI == SI->case_end() && SI->getNumCases() != 0 &&
- !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg())) {
- const ConstantRange SIRange =
- LVI->getConstantRange(SI->getCondition(), SI);
-
- // If the numbered switch cases cover the entire range of the condition,
- // then the default case is not reachable.
- if (SIRange.getSignedMin() == Low && SIRange.getSignedMax() == High &&
- SI->getNumCases() == High - Low + 1) {
- createUnreachableSwitchDefault(SI, &DTU);
- Changed = true;
- }
- }
}
if (Changed)
@@ -688,7 +672,7 @@ static bool processCallSite(CallBase &CB, LazyValueInfo *LVI) {
ArgNo++;
}
- assert(ArgNo == CB.arg_size() && "sanity check");
+ assert(ArgNo == CB.arg_size() && "Call arguments not processed correctly.");
if (ArgNos.empty())
return Changed;
@@ -954,10 +938,22 @@ static bool processAShr(BinaryOperator *SDI, LazyValueInfo *LVI) {
if (SDI->getType()->isVectorTy())
return false;
+ ConstantRange LRange = LVI->getConstantRange(SDI->getOperand(0), SDI);
+ unsigned OrigWidth = SDI->getType()->getIntegerBitWidth();
+ ConstantRange NegOneOrZero =
+ ConstantRange(APInt(OrigWidth, (uint64_t)-1, true), APInt(OrigWidth, 1));
+ if (NegOneOrZero.contains(LRange)) {
+ // ashr of -1 or 0 never changes the value, so drop the whole instruction
+ ++NumAShrsRemoved;
+ SDI->replaceAllUsesWith(SDI->getOperand(0));
+ SDI->eraseFromParent();
+ return true;
+ }
+
if (!isNonNegative(SDI->getOperand(0), LVI, SDI))
return false;
- ++NumAShrs;
+ ++NumAShrsConverted;
auto *BO = BinaryOperator::CreateLShr(SDI->getOperand(0), SDI->getOperand(1),
SDI->getName(), SDI);
BO->setDebugLoc(SDI->getDebugLoc());
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index a8ec8bb97970..e0d3a6accadd 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -159,52 +159,22 @@ static cl::opt<unsigned> MemorySSAPathCheckLimit(
cl::desc("The maximum number of blocks to check when trying to prove that "
"all paths to an exit go through a killing block (default = 50)"));
+// This flags allows or disallows DSE to optimize MemorySSA during its
+// traversal. Note that DSE optimizing MemorySSA may impact other passes
+// downstream of the DSE invocation and can lead to issues not being
+// reproducible in isolation (i.e. when MemorySSA is built from scratch). In
+// those cases, the flag can be used to check if DSE's MemorySSA optimizations
+// impact follow-up passes.
+static cl::opt<bool>
+ OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(true), cl::Hidden,
+ cl::desc("Allow DSE to optimize memory accesses."));
+
//===----------------------------------------------------------------------===//
// Helper functions
//===----------------------------------------------------------------------===//
using OverlapIntervalsTy = std::map<int64_t, int64_t>;
using InstOverlapIntervalsTy = DenseMap<Instruction *, OverlapIntervalsTy>;
-/// Does this instruction write some memory? This only returns true for things
-/// that we can analyze with other helpers below.
-static bool hasAnalyzableMemoryWrite(Instruction *I,
- const TargetLibraryInfo &TLI) {
- if (isa<StoreInst>(I))
- return true;
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
- switch (II->getIntrinsicID()) {
- default:
- return false;
- case Intrinsic::memset:
- case Intrinsic::memmove:
- case Intrinsic::memcpy:
- case Intrinsic::memcpy_inline:
- case Intrinsic::memcpy_element_unordered_atomic:
- case Intrinsic::memmove_element_unordered_atomic:
- case Intrinsic::memset_element_unordered_atomic:
- case Intrinsic::init_trampoline:
- case Intrinsic::lifetime_end:
- case Intrinsic::masked_store:
- return true;
- }
- }
- if (auto *CB = dyn_cast<CallBase>(I)) {
- LibFunc LF;
- if (TLI.getLibFunc(*CB, LF) && TLI.has(LF)) {
- switch (LF) {
- case LibFunc_strcpy:
- case LibFunc_strncpy:
- case LibFunc_strcat:
- case LibFunc_strncat:
- return true;
- default:
- return false;
- }
- }
- }
- return false;
-}
-
/// If the value of this instruction and the memory it writes to is unused, may
/// we delete this instruction?
static bool isRemovable(Instruction *I) {
@@ -214,7 +184,7 @@ static bool isRemovable(Instruction *I) {
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
switch (II->getIntrinsicID()) {
- default: llvm_unreachable("doesn't pass 'hasAnalyzableMemoryWrite' predicate");
+ default: llvm_unreachable("Does not have LocForWrite");
case Intrinsic::lifetime_end:
// Never remove dead lifetime_end's, e.g. because it is followed by a
// free.
@@ -296,6 +266,7 @@ enum OverwriteResult {
OW_End,
OW_PartialEarlierWithFullLater,
OW_MaybePartial,
+ OW_None,
OW_Unknown
};
@@ -841,7 +812,7 @@ struct DSEState {
/// Keep track of instructions (partly) overlapping with killing MemoryDefs per
/// basic block.
- DenseMap<BasicBlock *, InstOverlapIntervalsTy> IOLs;
+ MapVector<BasicBlock *, InstOverlapIntervalsTy> IOLs;
// Class contains self-reference, make sure it's not copied/moved.
DSEState(const DSEState &) = delete;
@@ -889,6 +860,7 @@ struct DSEState {
/// Return OW_MaybePartial if \p KillingI does not completely overwrite
/// \p DeadI, but they both write to the same underlying object. In that
/// case, use isPartialOverwrite to check if \p KillingI partially overwrites
+ /// \p DeadI. Returns 'OR_None' if \p KillingI is known to not overwrite the
/// \p DeadI. Returns 'OW_Unknown' if nothing can be determined.
OverwriteResult isOverwrite(const Instruction *KillingI,
const Instruction *DeadI,
@@ -951,8 +923,16 @@ struct DSEState {
// If we can't resolve the same pointers to the same object, then we can't
// analyze them at all.
- if (DeadUndObj != KillingUndObj)
+ if (DeadUndObj != KillingUndObj) {
+ // Non aliasing stores to different objects don't overlap. Note that
+ // if the killing store is known to overwrite whole object (out of
+ // bounds access overwrites whole object as well) then it is assumed to
+ // completely overwrite any store to the same object even if they don't
+ // actually alias (see next check).
+ if (AAR == AliasResult::NoAlias)
+ return OW_None;
return OW_Unknown;
+ }
// If the KillingI store is to a recognizable object, get its size.
uint64_t KillingUndObjSize = getPointerSize(KillingUndObj, DL, TLI, &F);
@@ -1006,9 +986,8 @@ struct DSEState {
return OW_MaybePartial;
}
- // Can reach here only if accesses are known not to overlap. There is no
- // dedicated code to indicate no overlap so signal "unknown".
- return OW_Unknown;
+ // Can reach here only if accesses are known not to overlap.
+ return OW_None;
}
bool isInvisibleToCallerAfterRet(const Value *V) {
@@ -1304,6 +1283,15 @@ struct DSEState {
Instruction *KillingI = KillingDef->getMemoryInst();
LLVM_DEBUG(dbgs() << " trying to get dominating access\n");
+ // Only optimize defining access of KillingDef when directly starting at its
+ // defining access. The defining access also must only access KillingLoc. At
+ // the moment we only support instructions with a single write location, so
+ // it should be sufficient to disable optimizations for instructions that
+ // also read from memory.
+ bool CanOptimize = OptimizeMemorySSA &&
+ KillingDef->getDefiningAccess() == StartAccess &&
+ !KillingI->mayReadFromMemory();
+
// Find the next clobbering Mod access for DefLoc, starting at StartAccess.
Optional<MemoryLocation> CurrentLoc;
for (;; Current = cast<MemoryDef>(Current)->getDefiningAccess()) {
@@ -1345,8 +1333,10 @@ struct DSEState {
Instruction *CurrentI = CurrentDef->getMemoryInst();
if (canSkipDef(CurrentDef, !isInvisibleToCallerBeforeRet(KillingUndObj),
- TLI))
+ TLI)) {
+ CanOptimize = false;
continue;
+ }
// Before we try to remove anything, check for any extra throwing
// instructions that block us from DSEing
@@ -1380,15 +1370,13 @@ struct DSEState {
return None;
}
- // If Current cannot be analyzed or is not removable, check the next
- // candidate.
- if (!hasAnalyzableMemoryWrite(CurrentI, TLI) || !isRemovable(CurrentI))
- continue;
-
- // If Current does not have an analyzable write location, skip it
+ // If Current does not have an analyzable write location or is not
+ // removable, skip it.
CurrentLoc = getLocForWriteEx(CurrentI);
- if (!CurrentLoc)
+ if (!CurrentLoc || !isRemovable(CurrentI)) {
+ CanOptimize = false;
continue;
+ }
// AliasAnalysis does not account for loops. Limit elimination to
// candidates for which we can guarantee they always store to the same
@@ -1396,6 +1384,7 @@ struct DSEState {
if (!isGuaranteedLoopIndependent(CurrentI, KillingI, *CurrentLoc)) {
LLVM_DEBUG(dbgs() << " ... not guaranteed loop independent\n");
WalkerStepLimit -= 1;
+ CanOptimize = false;
continue;
}
@@ -1403,16 +1392,32 @@ struct DSEState {
// If the killing def is a memory terminator (e.g. lifetime.end), check
// the next candidate if the current Current does not write the same
// underlying object as the terminator.
- if (!isMemTerminator(*CurrentLoc, CurrentI, KillingI))
+ if (!isMemTerminator(*CurrentLoc, CurrentI, KillingI)) {
+ CanOptimize = false;
continue;
+ }
} else {
int64_t KillingOffset = 0;
int64_t DeadOffset = 0;
auto OR = isOverwrite(KillingI, CurrentI, KillingLoc, *CurrentLoc,
KillingOffset, DeadOffset);
+ if (CanOptimize) {
+ // CurrentDef is the earliest write clobber of KillingDef. Use it as
+ // optimized access. Do not optimize if CurrentDef is already the
+ // defining access of KillingDef.
+ if (CurrentDef != KillingDef->getDefiningAccess() &&
+ (OR == OW_Complete || OR == OW_MaybePartial))
+ KillingDef->setOptimized(CurrentDef);
+
+ // Once a may-aliasing def is encountered do not set an optimized
+ // access.
+ if (OR != OW_None)
+ CanOptimize = false;
+ }
+
// If Current does not write to the same object as KillingDef, check
// the next candidate.
- if (OR == OW_Unknown)
+ if (OR == OW_Unknown || OR == OW_None)
continue;
else if (OR == OW_MaybePartial) {
// If KillingDef only partially overwrites Current, check the next
@@ -1421,6 +1426,7 @@ struct DSEState {
// which are less likely to be removable in the end.
if (PartialLimit <= 1) {
WalkerStepLimit -= 1;
+ LLVM_DEBUG(dbgs() << " ... reached partial limit ... continue with next access\n");
continue;
}
PartialLimit -= 1;
@@ -1922,7 +1928,14 @@ struct DSEState {
if (SkipStores.contains(Def) || MSSA.isLiveOnEntryDef(Def) ||
!isRemovable(Def->getMemoryInst()))
continue;
- auto *UpperDef = dyn_cast<MemoryDef>(Def->getDefiningAccess());
+ MemoryDef *UpperDef;
+ // To conserve compile-time, we avoid walking to the next clobbering def.
+ // Instead, we just try to get the optimized access, if it exists. DSE
+ // will try to optimize defs during the earlier traversal.
+ if (Def->isOptimized())
+ UpperDef = dyn_cast<MemoryDef>(Def->getOptimized());
+ else
+ UpperDef = dyn_cast<MemoryDef>(Def->getDefiningAccess());
if (!UpperDef || MSSA.isLiveOnEntryDef(UpperDef))
continue;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index ae2fe2767074..7001d330fce0 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -1951,7 +1951,6 @@ bool IndVarSimplify::run(Loop *L) {
// using it.
if (!DisableLFTR) {
BasicBlock *PreHeader = L->getLoopPreheader();
- BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
SmallVector<BasicBlock*, 16> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
@@ -1987,7 +1986,7 @@ bool IndVarSimplify::run(Loop *L) {
// Avoid high cost expansions. Note: This heuristic is questionable in
// that our definition of "high cost" is not exactly principled.
if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget,
- TTI, PreHeaderBR))
+ TTI, PreHeader->getTerminator()))
continue;
// Check preconditions for proper SCEVExpander operation. SCEV does not
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LICM.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LICM.cpp
index bf714d167670..6f97f3e93123 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -486,7 +486,7 @@ bool LoopInvariantCodeMotion::runOnLoop(
// Check that neither this loop nor its parent have had LCSSA broken. LICM is
// specifically moving instructions across the loop boundary and so it is
- // especially in need of sanity checking here.
+ // especially in need of basic functional correctness checking here.
assert(L->isLCSSAForm(*DT) && "Loop not left in LCSSA form after LICM!");
assert((L->isOutermost() || L->getParentLoop()->isLCSSAForm(*DT)) &&
"Parent loop not left in LCSSA form after LICM!");
@@ -1860,6 +1860,7 @@ class LoopPromoter : public LoadAndStorePromoter {
bool UnorderedAtomic;
AAMDNodes AATags;
ICFLoopSafetyInfo &SafetyInfo;
+ bool CanInsertStoresInExitBlocks;
// We're about to add a use of V in a loop exit block. Insert an LCSSA phi
// (if legal) if doing so would add an out-of-loop use to an instruction
@@ -1886,12 +1887,13 @@ public:
SmallVectorImpl<MemoryAccess *> &MSSAIP, PredIteratorCache &PIC,
MemorySSAUpdater *MSSAU, LoopInfo &li, DebugLoc dl,
Align Alignment, bool UnorderedAtomic, const AAMDNodes &AATags,
- ICFLoopSafetyInfo &SafetyInfo)
+ ICFLoopSafetyInfo &SafetyInfo, bool CanInsertStoresInExitBlocks)
: LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA),
LoopExitBlocks(LEB), LoopInsertPts(LIP), MSSAInsertPts(MSSAIP),
PredCache(PIC), MSSAU(MSSAU), LI(li), DL(std::move(dl)),
Alignment(Alignment), UnorderedAtomic(UnorderedAtomic), AATags(AATags),
- SafetyInfo(SafetyInfo) {}
+ SafetyInfo(SafetyInfo),
+ CanInsertStoresInExitBlocks(CanInsertStoresInExitBlocks) {}
bool isInstInList(Instruction *I,
const SmallVectorImpl<Instruction *> &) const override {
@@ -1903,7 +1905,7 @@ public:
return PointerMustAliases.count(Ptr);
}
- void doExtraRewritesBeforeFinalDeletion() override {
+ void insertStoresInLoopExitBlocks() {
// Insert stores after in the loop exit blocks. Each exit block gets a
// store of the live-out values that feed them. Since we've already told
// the SSA updater about the defs in the loop and the preheader
@@ -1937,10 +1939,21 @@ public:
}
}
+ void doExtraRewritesBeforeFinalDeletion() override {
+ if (CanInsertStoresInExitBlocks)
+ insertStoresInLoopExitBlocks();
+ }
+
void instructionDeleted(Instruction *I) const override {
SafetyInfo.removeInstruction(I);
MSSAU->removeMemoryAccess(I);
}
+
+ bool shouldDelete(Instruction *I) const override {
+ if (isa<StoreInst>(I))
+ return CanInsertStoresInExitBlocks;
+ return true;
+ }
};
bool isNotCapturedBeforeOrInLoop(const Value *V, const Loop *L,
@@ -2039,6 +2052,7 @@ bool llvm::promoteLoopAccessesToScalars(
bool DereferenceableInPH = false;
bool SafeToInsertStore = false;
+ bool FoundLoadToPromote = false;
SmallVector<Instruction *, 64> LoopUses;
@@ -2067,16 +2081,11 @@ bool llvm::promoteLoopAccessesToScalars(
IsKnownThreadLocalObject = !isa<AllocaInst>(Object);
}
- // Check that all of the pointers in the alias set have the same type. We
- // cannot (yet) promote a memory location that is loaded and stored in
+ // Check that all accesses to pointers in the aliass set use the same type.
+ // We cannot (yet) promote a memory location that is loaded and stored in
// different sizes. While we are at it, collect alignment and AA info.
+ Type *AccessTy = nullptr;
for (Value *ASIV : PointerMustAliases) {
- // Check that all of the pointers in the alias set have the same type. We
- // cannot (yet) promote a memory location that is loaded and stored in
- // different sizes.
- if (SomePtr->getType() != ASIV->getType())
- return false;
-
for (User *U : ASIV->users()) {
// Ignore instructions that are outside the loop.
Instruction *UI = dyn_cast<Instruction>(U);
@@ -2091,6 +2100,7 @@ bool llvm::promoteLoopAccessesToScalars(
SawUnorderedAtomic |= Load->isAtomic();
SawNotAtomic |= !Load->isAtomic();
+ FoundLoadToPromote = true;
Align InstAlignment = Load->getAlign();
@@ -2153,6 +2163,11 @@ bool llvm::promoteLoopAccessesToScalars(
} else
return false; // Not a load or store.
+ if (!AccessTy)
+ AccessTy = getLoadStoreType(UI);
+ else if (AccessTy != getLoadStoreType(UI))
+ return false;
+
// Merge the AA tags.
if (LoopUses.empty()) {
// On the first load/store, just take its AA tags.
@@ -2175,9 +2190,7 @@ bool llvm::promoteLoopAccessesToScalars(
// If we're inserting an atomic load in the preheader, we must be able to
// lower it. We're only guaranteed to be able to lower naturally aligned
// atomics.
- auto *SomePtrElemType = SomePtr->getType()->getPointerElementType();
- if (SawUnorderedAtomic &&
- Alignment < MDL.getTypeStoreSize(SomePtrElemType))
+ if (SawUnorderedAtomic && Alignment < MDL.getTypeStoreSize(AccessTy))
return false;
// If we couldn't prove we can hoist the load, bail.
@@ -2199,13 +2212,20 @@ bool llvm::promoteLoopAccessesToScalars(
}
}
- // If we've still failed to prove we can sink the store, give up.
- if (!SafeToInsertStore)
+ // If we've still failed to prove we can sink the store, hoist the load
+ // only, if possible.
+ if (!SafeToInsertStore && !FoundLoadToPromote)
+ // If we cannot hoist the load either, give up.
return false;
- // Otherwise, this is safe to promote, lets do it!
- LLVM_DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " << *SomePtr
- << '\n');
+ // Lets do the promotion!
+ if (SafeToInsertStore)
+ LLVM_DEBUG(dbgs() << "LICM: Promoting load/store of the value: " << *SomePtr
+ << '\n');
+ else
+ LLVM_DEBUG(dbgs() << "LICM: Promoting load of the value: " << *SomePtr
+ << '\n');
+
ORE->emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "PromoteLoopAccessesToScalar",
LoopUses[0])
@@ -2224,13 +2244,14 @@ bool llvm::promoteLoopAccessesToScalars(
SSAUpdater SSA(&NewPHIs);
LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
InsertPts, MSSAInsertPts, PIC, MSSAU, *LI, DL,
- Alignment, SawUnorderedAtomic, AATags, *SafetyInfo);
+ Alignment, SawUnorderedAtomic, AATags, *SafetyInfo,
+ SafeToInsertStore);
// Set up the preheader to have a definition of the value. It is the live-out
// value from the preheader that uses in the loop will use.
LoadInst *PreheaderLoad = new LoadInst(
- SomePtr->getType()->getPointerElementType(), SomePtr,
- SomePtr->getName() + ".promoted", Preheader->getTerminator());
+ AccessTy, SomePtr, SomePtr->getName() + ".promoted",
+ Preheader->getTerminator());
if (SawUnorderedAtomic)
PreheaderLoad->setOrdering(AtomicOrdering::Unordered);
PreheaderLoad->setAlignment(Alignment);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopPassManager.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
index 3df4cfe8e4c1..6c783848432b 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
@@ -49,9 +49,17 @@ void PassManager<Loop, LoopAnalysisManager, LoopStandardAnalysisResults &,
LPMUpdater &>::printPipeline(raw_ostream &OS,
function_ref<StringRef(StringRef)>
MapClassName2PassName) {
- for (unsigned Idx = 0, Size = LoopPasses.size(); Idx != Size; ++Idx) {
- auto *P = LoopPasses[Idx].get();
- P->printPipeline(OS, MapClassName2PassName);
+ assert(LoopPasses.size() + LoopNestPasses.size() == IsLoopNestPass.size());
+
+ unsigned IdxLP = 0, IdxLNP = 0;
+ for (unsigned Idx = 0, Size = IsLoopNestPass.size(); Idx != Size; ++Idx) {
+ if (IsLoopNestPass[Idx]) {
+ auto *P = LoopNestPasses[IdxLNP++].get();
+ P->printPipeline(OS, MapClassName2PassName);
+ } else {
+ auto *P = LoopPasses[IdxLP++].get();
+ P->printPipeline(OS, MapClassName2PassName);
+ }
if (Idx + 1 < Size)
OS << ",";
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
index a87843d658a9..728d63fe2847 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
@@ -256,8 +256,8 @@ private:
}
}
- // Sanity check: amount of dead and live loop blocks should match the total
- // number of blocks in loop.
+ // Amount of dead and live loop blocks should match the total number of
+ // blocks in loop.
assert(L.getNumBlocks() == LiveLoopBlocks.size() + DeadLoopBlocks.size() &&
"Malformed block sets?");
@@ -305,7 +305,6 @@ private:
BlocksInLoopAfterFolding.insert(BB);
}
- // Sanity check: header must be in loop.
assert(BlocksInLoopAfterFolding.count(L.getHeader()) &&
"Header not in loop?");
assert(BlocksInLoopAfterFolding.size() <= LiveLoopBlocks.size() &&
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 67702520511b..39c8b65968aa 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -806,28 +806,27 @@ static Optional<unsigned> shouldFullUnroll(
ScalarEvolution &SE, const SmallPtrSetImpl<const Value *> &EphValues,
const unsigned FullUnrollTripCount, const UnrollCostEstimator UCE,
const TargetTransformInfo::UnrollingPreferences &UP) {
+ assert(FullUnrollTripCount && "should be non-zero!");
- if (FullUnrollTripCount && FullUnrollTripCount <= UP.FullUnrollMaxCount) {
- // When computing the unrolled size, note that BEInsns are not replicated
- // like the rest of the loop body.
- if (UCE.getUnrolledLoopSize(UP) < UP.Threshold) {
- return FullUnrollTripCount;
+ if (FullUnrollTripCount > UP.FullUnrollMaxCount)
+ return None;
- } else {
- // The loop isn't that small, but we still can fully unroll it if that
- // helps to remove a significant number of instructions.
- // To check that, run additional analysis on the loop.
- if (Optional<EstimatedUnrollCost> Cost = analyzeLoopUnrollCost(
- L, FullUnrollTripCount, DT, SE, EphValues, TTI,
- UP.Threshold * UP.MaxPercentThresholdBoost / 100,
- UP.MaxIterationsCountToAnalyze)) {
- unsigned Boost =
- getFullUnrollBoostingFactor(*Cost, UP.MaxPercentThresholdBoost);
- if (Cost->UnrolledCost < UP.Threshold * Boost / 100) {
- return FullUnrollTripCount;
- }
- }
- }
+ // When computing the unrolled size, note that BEInsns are not replicated
+ // like the rest of the loop body.
+ if (UCE.getUnrolledLoopSize(UP) < UP.Threshold)
+ return FullUnrollTripCount;
+
+ // The loop isn't that small, but we still can fully unroll it if that
+ // helps to remove a significant number of instructions.
+ // To check that, run additional analysis on the loop.
+ if (Optional<EstimatedUnrollCost> Cost = analyzeLoopUnrollCost(
+ L, FullUnrollTripCount, DT, SE, EphValues, TTI,
+ UP.Threshold * UP.MaxPercentThresholdBoost / 100,
+ UP.MaxIterationsCountToAnalyze)) {
+ unsigned Boost =
+ getFullUnrollBoostingFactor(*Cost, UP.MaxPercentThresholdBoost);
+ if (Cost->UnrolledCost < UP.Threshold * Boost / 100)
+ return FullUnrollTripCount;
}
return None;
}
@@ -837,51 +836,48 @@ shouldPartialUnroll(const unsigned LoopSize, const unsigned TripCount,
const UnrollCostEstimator UCE,
const TargetTransformInfo::UnrollingPreferences &UP) {
+ if (!TripCount)
+ return None;
+
+ if (!UP.Partial) {
+ LLVM_DEBUG(dbgs() << " will not try to unroll partially because "
+ << "-unroll-allow-partial not given\n");
+ return 0;
+ }
unsigned count = UP.Count;
- if (TripCount) {
- if (!UP.Partial) {
- LLVM_DEBUG(dbgs() << " will not try to unroll partially because "
- << "-unroll-allow-partial not given\n");
- count = 0;
- return count;
- }
- if (count == 0)
- count = TripCount;
- if (UP.PartialThreshold != NoThreshold) {
- // Reduce unroll count to be modulo of TripCount for partial unrolling.
- if (UCE.getUnrolledLoopSize(UP, count) > UP.PartialThreshold)
- count = (std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) /
- (LoopSize - UP.BEInsns);
- if (count > UP.MaxCount)
- count = UP.MaxCount;
- while (count != 0 && TripCount % count != 0)
- count--;
- if (UP.AllowRemainder && count <= 1) {
- // If there is no Count that is modulo of TripCount, set Count to
- // largest power-of-two factor that satisfies the threshold limit.
- // As we'll create fixup loop, do the type of unrolling only if
- // remainder loop is allowed.
- count = UP.DefaultUnrollRuntimeCount;
- while (count != 0 &&
- UCE.getUnrolledLoopSize(UP, count) > UP.PartialThreshold)
- count >>= 1;
- }
- if (count < 2) {
- count = 0;
- }
- } else {
- count = TripCount;
- }
+ if (count == 0)
+ count = TripCount;
+ if (UP.PartialThreshold != NoThreshold) {
+ // Reduce unroll count to be modulo of TripCount for partial unrolling.
+ if (UCE.getUnrolledLoopSize(UP, count) > UP.PartialThreshold)
+ count = (std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) /
+ (LoopSize - UP.BEInsns);
if (count > UP.MaxCount)
count = UP.MaxCount;
-
- LLVM_DEBUG(dbgs() << " partially unrolling with count: " << count << "\n");
-
- return count;
+ while (count != 0 && TripCount % count != 0)
+ count--;
+ if (UP.AllowRemainder && count <= 1) {
+ // If there is no Count that is modulo of TripCount, set Count to
+ // largest power-of-two factor that satisfies the threshold limit.
+ // As we'll create fixup loop, do the type of unrolling only if
+ // remainder loop is allowed.
+ count = UP.DefaultUnrollRuntimeCount;
+ while (count != 0 &&
+ UCE.getUnrolledLoopSize(UP, count) > UP.PartialThreshold)
+ count >>= 1;
+ }
+ if (count < 2) {
+ count = 0;
+ }
+ } else {
+ count = TripCount;
}
+ if (count > UP.MaxCount)
+ count = UP.MaxCount;
- // if didn't return until here, should continue to other priorties
- return None;
+ LLVM_DEBUG(dbgs() << " partially unrolling with count: " << count << "\n");
+
+ return count;
}
// Returns true if unroll count was set explicitly.
// Calculates unroll count and writes it to UP.Count.
@@ -900,7 +896,6 @@ bool llvm::computeUnrollCount(
TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound) {
UnrollCostEstimator UCE(*L, LoopSize);
- Optional<unsigned> UnrollFactor;
const bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0;
const bool PragmaFullUnroll = hasUnrollFullPragma(L);
@@ -926,9 +921,8 @@ bool llvm::computeUnrollCount(
// Check for explicit Count.
// 1st priority is unroll count set by "unroll-count" option.
// 2nd priority is unroll count set by pragma.
- UnrollFactor = shouldPragmaUnroll(L, PInfo, TripMultiple, TripCount, UCE, UP);
-
- if (UnrollFactor) {
+ if (auto UnrollFactor = shouldPragmaUnroll(L, PInfo, TripMultiple, TripCount,
+ UCE, UP)) {
UP.Count = *UnrollFactor;
if (UserUnrollCount || (PragmaCount > 0)) {
@@ -948,11 +942,20 @@ bool llvm::computeUnrollCount(
}
}
- // 3rd priority is full unroll count.
- // Full unroll makes sense only when TripCount or its upper bound could be
- // statically calculated.
- // Also we need to check if we exceed FullUnrollMaxCount.
+ // 3rd priority is exact full unrolling. This will eliminate all copies
+ // of some exit test.
+ UP.Count = 0;
+ if (TripCount) {
+ UP.Count = TripCount;
+ if (auto UnrollFactor = shouldFullUnroll(L, TTI, DT, SE, EphValues,
+ TripCount, UCE, UP)) {
+ UP.Count = *UnrollFactor;
+ UseUpperBound = false;
+ return ExplicitUnroll;
+ }
+ }
+ // 4th priority is bounded unrolling.
// We can unroll by the upper bound amount if it's generally allowed or if
// we know that the loop is executed either the upper bound or zero times.
// (MaxOrZero unrolling keeps only the first loop test, so the number of
@@ -961,37 +964,21 @@ bool llvm::computeUnrollCount(
// number of loop tests goes up which may end up being worse on targets with
// constrained branch predictor resources so is controlled by an option.)
// In addition we only unroll small upper bounds.
- unsigned FullUnrollMaxTripCount = MaxTripCount;
- if (!(UP.UpperBound || MaxOrZero) ||
- FullUnrollMaxTripCount > UnrollMaxUpperBound)
- FullUnrollMaxTripCount = 0;
-
- // UnrollByMaxCount and ExactTripCount cannot both be non zero since we only
- // compute the former when the latter is zero.
- unsigned ExactTripCount = TripCount;
- assert((ExactTripCount == 0 || FullUnrollMaxTripCount == 0) &&
- "ExtractTripCount and UnrollByMaxCount cannot both be non zero.");
-
- unsigned FullUnrollTripCount =
- ExactTripCount ? ExactTripCount : FullUnrollMaxTripCount;
- UP.Count = FullUnrollTripCount;
-
- UnrollFactor =
- shouldFullUnroll(L, TTI, DT, SE, EphValues, FullUnrollTripCount, UCE, UP);
-
- // if shouldFullUnroll can do the unrolling, some side parameteres should be
- // set
- if (UnrollFactor) {
- UP.Count = *UnrollFactor;
- UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount);
- TripCount = FullUnrollTripCount;
- TripMultiple = UP.UpperBound ? 1 : TripMultiple;
- return ExplicitUnroll;
- } else {
- UP.Count = FullUnrollTripCount;
+ // Note that the cost of bounded unrolling is always strictly greater than
+ // cost of exact full unrolling. As such, if we have an exact count and
+ // found it unprofitable, we'll never chose to bounded unroll.
+ if (!TripCount && MaxTripCount && (UP.UpperBound || MaxOrZero) &&
+ MaxTripCount <= UnrollMaxUpperBound) {
+ UP.Count = MaxTripCount;
+ if (auto UnrollFactor = shouldFullUnroll(L, TTI, DT, SE, EphValues,
+ MaxTripCount, UCE, UP)) {
+ UP.Count = *UnrollFactor;
+ UseUpperBound = true;
+ return ExplicitUnroll;
+ }
}
- // 4th priority is loop peeling.
+ // 5th priority is loop peeling.
computePeelCount(L, LoopSize, PP, TripCount, DT, SE, UP.Threshold);
if (PP.PeelCount) {
UP.Runtime = false;
@@ -1004,11 +991,9 @@ bool llvm::computeUnrollCount(
if (TripCount)
UP.Partial |= ExplicitUnroll;
- // 5th priority is partial unrolling.
+ // 6th priority is partial unrolling.
// Try partial unroll only when TripCount could be statically calculated.
- UnrollFactor = shouldPartialUnroll(LoopSize, TripCount, UCE, UP);
-
- if (UnrollFactor) {
+ if (auto UnrollFactor = shouldPartialUnroll(LoopSize, TripCount, UCE, UP)) {
UP.Count = *UnrollFactor;
if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&
@@ -1049,7 +1034,7 @@ bool llvm::computeUnrollCount(
"because loop has a runtime trip count.";
});
- // 6th priority is runtime unrolling.
+ // 7th priority is runtime unrolling.
// Don't unroll a runtime trip count loop when it is disabled.
if (hasRuntimeUnrollDisablePragma(L)) {
UP.Count = 0;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Reassociate.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Reassociate.cpp
index b0fb8daaba8f..c354fa177a60 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -494,7 +494,7 @@ static bool LinearizeExprTree(Instruction *I,
SmallVector<Value *, 8> LeafOrder; // Ensure deterministic leaf output order.
#ifndef NDEBUG
- SmallPtrSet<Value *, 8> Visited; // For sanity checking the iteration scheme.
+ SmallPtrSet<Value *, 8> Visited; // For checking the iteration scheme.
#endif
while (!Worklist.empty()) {
std::pair<Instruction*, APInt> P = Worklist.pop_back_val();
@@ -2313,11 +2313,8 @@ void ReassociatePass::ReassociateExpression(BinaryOperator *I) {
MadeChange |= LinearizeExprTree(I, Tree);
SmallVector<ValueEntry, 8> Ops;
Ops.reserve(Tree.size());
- for (unsigned i = 0, e = Tree.size(); i != e; ++i) {
- RepeatedValue E = Tree[i];
- Ops.append(E.second.getZExtValue(),
- ValueEntry(getRank(E.first), E.first));
- }
+ for (const RepeatedValue &E : Tree)
+ Ops.append(E.second.getZExtValue(), ValueEntry(getRank(E.first), E.first));
LLVM_DEBUG(dbgs() << "RAIn:\t"; PrintOps(I, Ops); dbgs() << '\n');
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 86d3620c312e..3799d2dd1cf2 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -227,8 +227,7 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
unsigned IterCnt = 0;
(void)IterCnt;
while (LocalChange) {
- assert(IterCnt++ < 1000 &&
- "Sanity: iterative simplification didn't converge!");
+ assert(IterCnt++ < 1000 && "Iterative simplification didn't converge!");
LocalChange = false;
// Loop over all of the basic blocks and remove them if they are unneeded.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index 6469c899feea..d6d6b1a7fa09 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -235,22 +235,26 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU,
// These dominator edges will be redirected from Pred.
std::vector<DominatorTree::UpdateType> Updates;
if (DTU) {
- SmallPtrSet<BasicBlock *, 2> SuccsOfBB(succ_begin(BB), succ_end(BB));
+ // To avoid processing the same predecessor more than once.
+ SmallPtrSet<BasicBlock *, 8> SeenSuccs;
SmallPtrSet<BasicBlock *, 2> SuccsOfPredBB(succ_begin(PredBB),
succ_end(PredBB));
- Updates.reserve(Updates.size() + 2 * SuccsOfBB.size() + 1);
+ Updates.reserve(Updates.size() + 2 * succ_size(BB) + 1);
// Add insert edges first. Experimentally, for the particular case of two
// blocks that can be merged, with a single successor and single predecessor
// respectively, it is beneficial to have all insert updates first. Deleting
// edges first may lead to unreachable blocks, followed by inserting edges
// making the blocks reachable again. Such DT updates lead to high compile
// times. We add inserts before deletes here to reduce compile time.
- for (BasicBlock *SuccOfBB : SuccsOfBB)
+ for (BasicBlock *SuccOfBB : successors(BB))
// This successor of BB may already be a PredBB's successor.
if (!SuccsOfPredBB.contains(SuccOfBB))
- Updates.push_back({DominatorTree::Insert, PredBB, SuccOfBB});
- for (BasicBlock *SuccOfBB : SuccsOfBB)
- Updates.push_back({DominatorTree::Delete, BB, SuccOfBB});
+ if (SeenSuccs.insert(SuccOfBB).second)
+ Updates.push_back({DominatorTree::Insert, PredBB, SuccOfBB});
+ SeenSuccs.clear();
+ for (BasicBlock *SuccOfBB : successors(BB))
+ if (SeenSuccs.insert(SuccOfBB).second)
+ Updates.push_back({DominatorTree::Delete, BB, SuccOfBB});
Updates.push_back({DominatorTree::Delete, PredBB, BB});
}
@@ -804,14 +808,14 @@ static BasicBlock *SplitBlockImpl(BasicBlock *Old, Instruction *SplitPt,
if (DTU) {
SmallVector<DominatorTree::UpdateType, 8> Updates;
// Old dominates New. New node dominates all other nodes dominated by Old.
- SmallPtrSet<BasicBlock *, 8> UniqueSuccessorsOfOld(succ_begin(New),
- succ_end(New));
+ SmallPtrSet<BasicBlock *, 8> UniqueSuccessorsOfOld;
Updates.push_back({DominatorTree::Insert, Old, New});
- Updates.reserve(Updates.size() + 2 * UniqueSuccessorsOfOld.size());
- for (BasicBlock *UniqueSuccessorOfOld : UniqueSuccessorsOfOld) {
- Updates.push_back({DominatorTree::Insert, New, UniqueSuccessorOfOld});
- Updates.push_back({DominatorTree::Delete, Old, UniqueSuccessorOfOld});
- }
+ Updates.reserve(Updates.size() + 2 * succ_size(New));
+ for (BasicBlock *SuccessorOfOld : successors(New))
+ if (UniqueSuccessorsOfOld.insert(SuccessorOfOld).second) {
+ Updates.push_back({DominatorTree::Insert, New, SuccessorOfOld});
+ Updates.push_back({DominatorTree::Delete, Old, SuccessorOfOld});
+ }
DTU->applyUpdates(Updates);
} else if (DT)
@@ -870,14 +874,14 @@ BasicBlock *llvm::splitBlockBefore(BasicBlock *Old, Instruction *SplitPt,
SmallVector<DominatorTree::UpdateType, 8> DTUpdates;
// New dominates Old. The predecessor nodes of the Old node dominate
// New node.
- SmallPtrSet<BasicBlock *, 8> UniquePredecessorsOfOld(pred_begin(New),
- pred_end(New));
+ SmallPtrSet<BasicBlock *, 8> UniquePredecessorsOfOld;
DTUpdates.push_back({DominatorTree::Insert, New, Old});
- DTUpdates.reserve(DTUpdates.size() + 2 * UniquePredecessorsOfOld.size());
- for (BasicBlock *UniquePredecessorOfOld : UniquePredecessorsOfOld) {
- DTUpdates.push_back({DominatorTree::Insert, UniquePredecessorOfOld, New});
- DTUpdates.push_back({DominatorTree::Delete, UniquePredecessorOfOld, Old});
- }
+ DTUpdates.reserve(DTUpdates.size() + 2 * pred_size(New));
+ for (BasicBlock *PredecessorOfOld : predecessors(New))
+ if (UniquePredecessorsOfOld.insert(PredecessorOfOld).second) {
+ DTUpdates.push_back({DominatorTree::Insert, PredecessorOfOld, New});
+ DTUpdates.push_back({DominatorTree::Delete, PredecessorOfOld, Old});
+ }
DTU->applyUpdates(DTUpdates);
@@ -910,13 +914,14 @@ static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB,
} else {
// Split block expects NewBB to have a non-empty set of predecessors.
SmallVector<DominatorTree::UpdateType, 8> Updates;
- SmallPtrSet<BasicBlock *, 8> UniquePreds(Preds.begin(), Preds.end());
+ SmallPtrSet<BasicBlock *, 8> UniquePreds;
Updates.push_back({DominatorTree::Insert, NewBB, OldBB});
- Updates.reserve(Updates.size() + 2 * UniquePreds.size());
- for (auto *UniquePred : UniquePreds) {
- Updates.push_back({DominatorTree::Insert, UniquePred, NewBB});
- Updates.push_back({DominatorTree::Delete, UniquePred, OldBB});
- }
+ Updates.reserve(Updates.size() + 2 * Preds.size());
+ for (auto *Pred : Preds)
+ if (UniquePreds.insert(Pred).second) {
+ Updates.push_back({DominatorTree::Insert, Pred, NewBB});
+ Updates.push_back({DominatorTree::Delete, Pred, OldBB});
+ }
DTU->applyUpdates(Updates);
}
} else if (DT) {
@@ -1376,14 +1381,14 @@ SplitBlockAndInsertIfThenImpl(Value *Cond, Instruction *SplitBefore,
BasicBlock *Head = SplitBefore->getParent();
BasicBlock *Tail = Head->splitBasicBlock(SplitBefore->getIterator());
if (DTU) {
- SmallPtrSet<BasicBlock *, 8> UniqueSuccessorsOfHead(succ_begin(Tail),
- succ_end(Tail));
+ SmallPtrSet<BasicBlock *, 8> UniqueSuccessorsOfHead;
Updates.push_back({DominatorTree::Insert, Head, Tail});
- Updates.reserve(Updates.size() + 2 * UniqueSuccessorsOfHead.size());
- for (BasicBlock *UniqueSuccessorOfHead : UniqueSuccessorsOfHead) {
- Updates.push_back({DominatorTree::Insert, Tail, UniqueSuccessorOfHead});
- Updates.push_back({DominatorTree::Delete, Head, UniqueSuccessorOfHead});
- }
+ Updates.reserve(Updates.size() + 2 * succ_size(Tail));
+ for (BasicBlock *SuccessorOfHead : successors(Tail))
+ if (UniqueSuccessorsOfHead.insert(SuccessorOfHead).second) {
+ Updates.push_back({DominatorTree::Insert, Tail, SuccessorOfHead});
+ Updates.push_back({DominatorTree::Delete, Head, SuccessorOfHead});
+ }
}
Instruction *HeadOldTerm = Head->getTerminator();
LLVMContext &C = Head->getContext();
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
index 957935398972..580cfd80141e 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -452,18 +452,17 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
return Changed;
case LibFunc_mempcpy:
case LibFunc_memccpy:
+ Changed |= setWillReturn(F);
+ LLVM_FALLTHROUGH;
+ case LibFunc_memcpy_chk:
Changed |= setDoesNotThrow(F);
Changed |= setOnlyAccessesArgMemory(F);
- Changed |= setWillReturn(F);
Changed |= setDoesNotAlias(F, 0);
Changed |= setOnlyWritesMemory(F, 0);
Changed |= setDoesNotAlias(F, 1);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
- case LibFunc_memcpy_chk:
- Changed |= setDoesNotThrow(F);
- return Changed;
case LibFunc_memalign:
Changed |= setOnlyAccessesInaccessibleMemory(F);
Changed |= setRetNoUndef(F);
@@ -1018,9 +1017,8 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 1);
return Changed;
- // TODO: add LibFunc entries for:
- // case LibFunc_memset_pattern4:
- // case LibFunc_memset_pattern8:
+ case LibFunc_memset_pattern4:
+ case LibFunc_memset_pattern8:
case LibFunc_memset_pattern16:
Changed |= setOnlyAccessesArgMemory(F);
Changed |= setDoesNotCapture(F, 0);
@@ -1029,10 +1027,12 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
case LibFunc_memset:
- Changed |= setOnlyAccessesArgMemory(F);
Changed |= setWillReturn(F);
- Changed |= setDoesNotThrow(F);
+ LLVM_FALLTHROUGH;
+ case LibFunc_memset_chk:
+ Changed |= setOnlyAccessesArgMemory(F);
Changed |= setOnlyWritesMemory(F, 0);
+ Changed |= setDoesNotThrow(F);
return Changed;
// int __nvvm_reflect(const char *)
case LibFunc_nvvm_reflect:
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneModule.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneModule.cpp
index 200deca4b317..57c273a0e3c5 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneModule.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/CloneModule.cpp
@@ -135,10 +135,18 @@ std::unique_ptr<Module> llvm::CloneModule(
// Similarly, copy over function bodies now...
//
for (const Function &I : M) {
- if (I.isDeclaration())
+ Function *F = cast<Function>(VMap[&I]);
+
+ if (I.isDeclaration()) {
+ // Copy over metadata for declarations since we're not doing it below in
+ // CloneFunctionInto().
+ SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
+ I.getAllMetadata(MDs);
+ for (auto MD : MDs)
+ F->addMetadata(MD.first, *MapMetadata(MD.second, VMap));
continue;
+ }
- Function *F = cast<Function>(VMap[&I]);
if (!ShouldCloneDefinition(&I)) {
// Skip after setting the correct linkage for an external reference.
F->setLinkage(GlobalValue::ExternalLinkage);
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/GuardUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/GuardUtils.cpp
index 4dbcbf80d3da..7c310f16d46e 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/GuardUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/GuardUtils.cpp
@@ -74,7 +74,7 @@ void llvm::makeGuardControlFlowExplicit(Function *DeoptIntrinsic,
{}, {}, nullptr, "widenable_cond");
CheckBI->setCondition(B.CreateAnd(CheckBI->getCondition(), WC,
"exiplicit_guard_cond"));
- assert(isWidenableBranch(CheckBI) && "sanity check");
+ assert(isWidenableBranch(CheckBI) && "Branch must be widenable.");
}
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp
index f4776589910f..997667810580 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -1218,10 +1218,9 @@ static void AddReturnAttributes(CallBase &CB, ValueToValueMapTy &VMap) {
if (!RI || !isa<CallBase>(RI->getOperand(0)))
continue;
auto *RetVal = cast<CallBase>(RI->getOperand(0));
- // Sanity check that the cloned RetVal exists and is a call, otherwise we
- // cannot add the attributes on the cloned RetVal.
- // Simplification during inlining could have transformed the cloned
- // instruction.
+ // Check that the cloned RetVal exists and is a call, otherwise we cannot
+ // add the attributes on the cloned RetVal. Simplification during inlining
+ // could have transformed the cloned instruction.
auto *NewRetVal = dyn_cast_or_null<CallBase>(VMap.lookup(RetVal));
if (!NewRetVal)
continue;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp
index 74ab37fadf36..ec926b1f5a94 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp
@@ -529,8 +529,8 @@ bool llvm::RecursivelyDeleteTriviallyDeadInstructionsPermissive(
std::function<void(Value *)> AboutToDeleteCallback) {
unsigned S = 0, E = DeadInsts.size(), Alive = 0;
for (; S != E; ++S) {
- auto *I = cast<Instruction>(DeadInsts[S]);
- if (!isInstructionTriviallyDead(I)) {
+ auto *I = dyn_cast<Instruction>(DeadInsts[S]);
+ if (!I || !isInstructionTriviallyDead(I)) {
DeadInsts[S] = nullptr;
++Alive;
}
@@ -760,15 +760,18 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB,
SmallVector<DominatorTree::UpdateType, 32> Updates;
if (DTU) {
- SmallPtrSet<BasicBlock *, 2> PredsOfPredBB(pred_begin(PredBB),
- pred_end(PredBB));
- Updates.reserve(Updates.size() + 2 * PredsOfPredBB.size() + 1);
- for (BasicBlock *PredOfPredBB : PredsOfPredBB)
+ // To avoid processing the same predecessor more than once.
+ SmallPtrSet<BasicBlock *, 2> SeenPreds;
+ Updates.reserve(Updates.size() + 2 * pred_size(PredBB) + 1);
+ for (BasicBlock *PredOfPredBB : predecessors(PredBB))
// This predecessor of PredBB may already have DestBB as a successor.
if (PredOfPredBB != PredBB)
- Updates.push_back({DominatorTree::Insert, PredOfPredBB, DestBB});
- for (BasicBlock *PredOfPredBB : PredsOfPredBB)
- Updates.push_back({DominatorTree::Delete, PredOfPredBB, PredBB});
+ if (SeenPreds.insert(PredOfPredBB).second)
+ Updates.push_back({DominatorTree::Insert, PredOfPredBB, DestBB});
+ SeenPreds.clear();
+ for (BasicBlock *PredOfPredBB : predecessors(PredBB))
+ if (SeenPreds.insert(PredOfPredBB).second)
+ Updates.push_back({DominatorTree::Delete, PredOfPredBB, PredBB});
Updates.push_back({DominatorTree::Delete, PredBB, DestBB});
}
@@ -1096,16 +1099,20 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
SmallVector<DominatorTree::UpdateType, 32> Updates;
if (DTU) {
+ // To avoid processing the same predecessor more than once.
+ SmallPtrSet<BasicBlock *, 8> SeenPreds;
// All predecessors of BB will be moved to Succ.
- SmallPtrSet<BasicBlock *, 8> PredsOfBB(pred_begin(BB), pred_end(BB));
SmallPtrSet<BasicBlock *, 8> PredsOfSucc(pred_begin(Succ), pred_end(Succ));
- Updates.reserve(Updates.size() + 2 * PredsOfBB.size() + 1);
- for (auto *PredOfBB : PredsOfBB)
+ Updates.reserve(Updates.size() + 2 * pred_size(BB) + 1);
+ for (auto *PredOfBB : predecessors(BB))
// This predecessor of BB may already have Succ as a successor.
if (!PredsOfSucc.contains(PredOfBB))
- Updates.push_back({DominatorTree::Insert, PredOfBB, Succ});
- for (auto *PredOfBB : PredsOfBB)
- Updates.push_back({DominatorTree::Delete, PredOfBB, BB});
+ if (SeenPreds.insert(PredOfBB).second)
+ Updates.push_back({DominatorTree::Insert, PredOfBB, Succ});
+ SeenPreds.clear();
+ for (auto *PredOfBB : predecessors(BB))
+ if (SeenPreds.insert(PredOfBB).second)
+ Updates.push_back({DominatorTree::Delete, PredOfBB, BB});
Updates.push_back({DominatorTree::Delete, BB, Succ});
}
@@ -2190,26 +2197,6 @@ void llvm::changeToCall(InvokeInst *II, DomTreeUpdater *DTU) {
DTU->applyUpdates({{DominatorTree::Delete, BB, UnwindDestBB}});
}
-void llvm::createUnreachableSwitchDefault(SwitchInst *Switch,
- DomTreeUpdater *DTU) {
- LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
- auto *BB = Switch->getParent();
- auto *OrigDefaultBlock = Switch->getDefaultDest();
- OrigDefaultBlock->removePredecessor(BB);
- BasicBlock *NewDefaultBlock = BasicBlock::Create(
- BB->getContext(), BB->getName() + ".unreachabledefault", BB->getParent(),
- OrigDefaultBlock);
- new UnreachableInst(Switch->getContext(), NewDefaultBlock);
- Switch->setDefaultDest(&*NewDefaultBlock);
- if (DTU) {
- SmallVector<DominatorTree::UpdateType, 2> Updates;
- Updates.push_back({DominatorTree::Insert, BB, &*NewDefaultBlock});
- if (!is_contained(successors(BB), OrigDefaultBlock))
- Updates.push_back({DominatorTree::Delete, BB, &*OrigDefaultBlock});
- DTU->applyUpdates(Updates);
- }
-}
-
BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI,
BasicBlock *UnwindEdge,
DomTreeUpdater *DTU) {
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index a92cb6a313d3..bb719a499a4c 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -623,15 +623,13 @@ bool llvm::UnrollRuntimeLoopRemainder(
if (!SE)
return false;
- // Only unroll loops with a computable trip count, and the trip count needs
- // to be an int value (allowing a pointer type is a TODO item).
+ // Only unroll loops with a computable trip count.
// We calculate the backedge count by using getExitCount on the Latch block,
// which is proven to be the only exiting block in this loop. This is same as
// calculating getBackedgeTakenCount on the loop (which computes SCEV for all
// exiting blocks).
const SCEV *BECountSC = SE->getExitCount(L, Latch);
- if (isa<SCEVCouldNotCompute>(BECountSC) ||
- !BECountSC->getType()->isIntegerTy()) {
+ if (isa<SCEVCouldNotCompute>(BECountSC)) {
LLVM_DEBUG(dbgs() << "Could not compute exit block SCEV\n");
return false;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 68572d479742..c8e42acdffb3 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -1049,6 +1049,7 @@ Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder,
return Builder.CreateOrReduce(Src);
case RecurKind::Xor:
return Builder.CreateXorReduce(Src);
+ case RecurKind::FMulAdd:
case RecurKind::FAdd:
return Builder.CreateFAddReduce(ConstantFP::getNegativeZero(SrcVecEltTy),
Src);
@@ -1091,7 +1092,8 @@ Value *llvm::createTargetReduction(IRBuilderBase &B,
Value *llvm::createOrderedReduction(IRBuilderBase &B,
const RecurrenceDescriptor &Desc,
Value *Src, Value *Start) {
- assert(Desc.getRecurrenceKind() == RecurKind::FAdd &&
+ assert((Desc.getRecurrenceKind() == RecurKind::FAdd ||
+ Desc.getRecurrenceKind() == RecurKind::FMulAdd) &&
"Unexpected reduction kind");
assert(Src->getType()->isVectorTy() && "Expected a vector type");
assert(!Start->getType()->isVectorTy() && "Expected a scalar type");
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SSAUpdater.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SSAUpdater.cpp
index 5893ce15b129..7d9992176658 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SSAUpdater.cpp
@@ -446,6 +446,9 @@ void LoadAndStorePromoter::run(const SmallVectorImpl<Instruction *> &Insts) {
// Now that everything is rewritten, delete the old instructions from the
// function. They should all be dead now.
for (Instruction *User : Insts) {
+ if (!shouldDelete(User))
+ continue;
+
// If this is a load that still has uses, then the load must have been added
// as a live value in the SSAUpdate data structure for a block (e.g. because
// the loaded value was stored later). In this case, we need to recursively
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SampleProfileInference.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SampleProfileInference.cpp
new file mode 100644
index 000000000000..9495e442e0bf
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SampleProfileInference.cpp
@@ -0,0 +1,462 @@
+//===- SampleProfileInference.cpp - Adjust sample profiles in the IR ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a profile inference algorithm. Given an incomplete and
+// possibly imprecise block counts, the algorithm reconstructs realistic block
+// and edge counts that satisfy flow conservation rules, while minimally modify
+// input block counts.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/SampleProfileInference.h"
+#include "llvm/Support/Debug.h"
+#include <queue>
+#include <set>
+
+using namespace llvm;
+#define DEBUG_TYPE "sample-profile-inference"
+
+namespace {
+
+/// A value indicating an infinite flow/capacity/weight of a block/edge.
+/// Not using numeric_limits<int64_t>::max(), as the values can be summed up
+/// during the execution.
+static constexpr int64_t INF = ((int64_t)1) << 50;
+
+/// The minimum-cost maximum flow algorithm.
+///
+/// The algorithm finds the maximum flow of minimum cost on a given (directed)
+/// network using a modified version of the classical Moore-Bellman-Ford
+/// approach. The algorithm applies a number of augmentation iterations in which
+/// flow is sent along paths of positive capacity from the source to the sink.
+/// The worst-case time complexity of the implementation is O(v(f)*m*n), where
+/// where m is the number of edges, n is the number of vertices, and v(f) is the
+/// value of the maximum flow. However, the observed running time on typical
+/// instances is sub-quadratic, that is, o(n^2).
+///
+/// The input is a set of edges with specified costs and capacities, and a pair
+/// of nodes (source and sink). The output is the flow along each edge of the
+/// minimum total cost respecting the given edge capacities.
+class MinCostMaxFlow {
+public:
+ // Initialize algorithm's data structures for a network of a given size.
+ void initialize(uint64_t NodeCount, uint64_t SourceNode, uint64_t SinkNode) {
+ Source = SourceNode;
+ Target = SinkNode;
+
+ Nodes = std::vector<Node>(NodeCount);
+ Edges = std::vector<std::vector<Edge>>(NodeCount, std::vector<Edge>());
+ }
+
+ // Run the algorithm.
+ int64_t run() {
+ // Find an augmenting path and update the flow along the path
+ size_t AugmentationIters = 0;
+ while (findAugmentingPath()) {
+ augmentFlowAlongPath();
+ AugmentationIters++;
+ }
+
+ // Compute the total flow and its cost
+ int64_t TotalCost = 0;
+ int64_t TotalFlow = 0;
+ for (uint64_t Src = 0; Src < Nodes.size(); Src++) {
+ for (auto &Edge : Edges[Src]) {
+ if (Edge.Flow > 0) {
+ TotalCost += Edge.Cost * Edge.Flow;
+ if (Src == Source)
+ TotalFlow += Edge.Flow;
+ }
+ }
+ }
+ LLVM_DEBUG(dbgs() << "Completed profi after " << AugmentationIters
+ << " iterations with " << TotalFlow << " total flow"
+ << " of " << TotalCost << " cost\n");
+ (void)TotalFlow;
+ return TotalCost;
+ }
+
+ /// Adding an edge to the network with a specified capacity and a cost.
+ /// Multiple edges between a pair of nodes are allowed but self-edges
+ /// are not supported.
+ void addEdge(uint64_t Src, uint64_t Dst, int64_t Capacity, int64_t Cost) {
+ assert(Capacity > 0 && "adding an edge of zero capacity");
+ assert(Src != Dst && "loop edge are not supported");
+
+ Edge SrcEdge;
+ SrcEdge.Dst = Dst;
+ SrcEdge.Cost = Cost;
+ SrcEdge.Capacity = Capacity;
+ SrcEdge.Flow = 0;
+ SrcEdge.RevEdgeIndex = Edges[Dst].size();
+
+ Edge DstEdge;
+ DstEdge.Dst = Src;
+ DstEdge.Cost = -Cost;
+ DstEdge.Capacity = 0;
+ DstEdge.Flow = 0;
+ DstEdge.RevEdgeIndex = Edges[Src].size();
+
+ Edges[Src].push_back(SrcEdge);
+ Edges[Dst].push_back(DstEdge);
+ }
+
+ /// Adding an edge to the network of infinite capacity and a given cost.
+ void addEdge(uint64_t Src, uint64_t Dst, int64_t Cost) {
+ addEdge(Src, Dst, INF, Cost);
+ }
+
+ /// Get the total flow from a given source node.
+ /// Returns a list of pairs (target node, amount of flow to the target).
+ const std::vector<std::pair<uint64_t, int64_t>> getFlow(uint64_t Src) const {
+ std::vector<std::pair<uint64_t, int64_t>> Flow;
+ for (auto &Edge : Edges[Src]) {
+ if (Edge.Flow > 0)
+ Flow.push_back(std::make_pair(Edge.Dst, Edge.Flow));
+ }
+ return Flow;
+ }
+
+ /// Get the total flow between a pair of nodes.
+ int64_t getFlow(uint64_t Src, uint64_t Dst) const {
+ int64_t Flow = 0;
+ for (auto &Edge : Edges[Src]) {
+ if (Edge.Dst == Dst) {
+ Flow += Edge.Flow;
+ }
+ }
+ return Flow;
+ }
+
+ /// A cost of increasing a block's count by one.
+ static constexpr int64_t AuxCostInc = 10;
+ /// A cost of decreasing a block's count by one.
+ static constexpr int64_t AuxCostDec = 20;
+ /// A cost of increasing a count of zero-weight block by one.
+ static constexpr int64_t AuxCostIncZero = 11;
+ /// A cost of increasing the entry block's count by one.
+ static constexpr int64_t AuxCostIncEntry = 40;
+ /// A cost of decreasing the entry block's count by one.
+ static constexpr int64_t AuxCostDecEntry = 10;
+ /// A cost of taking an unlikely jump.
+ static constexpr int64_t AuxCostUnlikely = ((int64_t)1) << 20;
+
+private:
+ /// Check for existence of an augmenting path with a positive capacity.
+ bool findAugmentingPath() {
+ // Initialize data structures
+ for (auto &Node : Nodes) {
+ Node.Distance = INF;
+ Node.ParentNode = uint64_t(-1);
+ Node.ParentEdgeIndex = uint64_t(-1);
+ Node.Taken = false;
+ }
+
+ std::queue<uint64_t> Queue;
+ Queue.push(Source);
+ Nodes[Source].Distance = 0;
+ Nodes[Source].Taken = true;
+ while (!Queue.empty()) {
+ uint64_t Src = Queue.front();
+ Queue.pop();
+ Nodes[Src].Taken = false;
+ // Although the residual network contains edges with negative costs
+ // (in particular, backward edges), it can be shown that there are no
+ // negative-weight cycles and the following two invariants are maintained:
+ // (i) Dist[Source, V] >= 0 and (ii) Dist[V, Target] >= 0 for all nodes V,
+ // where Dist is the length of the shortest path between two nodes. This
+ // allows to prune the search-space of the path-finding algorithm using
+ // the following early-stop criteria:
+ // -- If we find a path with zero-distance from Source to Target, stop the
+ // search, as the path is the shortest since Dist[Source, Target] >= 0;
+ // -- If we have Dist[Source, V] > Dist[Source, Target], then do not
+ // process node V, as it is guaranteed _not_ to be on a shortest path
+ // from Source to Target; it follows from inequalities
+ // Dist[Source, Target] >= Dist[Source, V] + Dist[V, Target]
+ // >= Dist[Source, V]
+ if (Nodes[Target].Distance == 0)
+ break;
+ if (Nodes[Src].Distance > Nodes[Target].Distance)
+ continue;
+
+ // Process adjacent edges
+ for (uint64_t EdgeIdx = 0; EdgeIdx < Edges[Src].size(); EdgeIdx++) {
+ auto &Edge = Edges[Src][EdgeIdx];
+ if (Edge.Flow < Edge.Capacity) {
+ uint64_t Dst = Edge.Dst;
+ int64_t NewDistance = Nodes[Src].Distance + Edge.Cost;
+ if (Nodes[Dst].Distance > NewDistance) {
+ // Update the distance and the parent node/edge
+ Nodes[Dst].Distance = NewDistance;
+ Nodes[Dst].ParentNode = Src;
+ Nodes[Dst].ParentEdgeIndex = EdgeIdx;
+ // Add the node to the queue, if it is not there yet
+ if (!Nodes[Dst].Taken) {
+ Queue.push(Dst);
+ Nodes[Dst].Taken = true;
+ }
+ }
+ }
+ }
+ }
+
+ return Nodes[Target].Distance != INF;
+ }
+
+ /// Update the current flow along the augmenting path.
+ void augmentFlowAlongPath() {
+ // Find path capacity
+ int64_t PathCapacity = INF;
+ uint64_t Now = Target;
+ while (Now != Source) {
+ uint64_t Pred = Nodes[Now].ParentNode;
+ auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
+ PathCapacity = std::min(PathCapacity, Edge.Capacity - Edge.Flow);
+ Now = Pred;
+ }
+
+ assert(PathCapacity > 0 && "found incorrect augmenting path");
+
+ // Update the flow along the path
+ Now = Target;
+ while (Now != Source) {
+ uint64_t Pred = Nodes[Now].ParentNode;
+ auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
+ auto &RevEdge = Edges[Now][Edge.RevEdgeIndex];
+
+ Edge.Flow += PathCapacity;
+ RevEdge.Flow -= PathCapacity;
+
+ Now = Pred;
+ }
+ }
+
+ /// An node in a flow network.
+ struct Node {
+ /// The cost of the cheapest path from the source to the current node.
+ int64_t Distance;
+ /// The node preceding the current one in the path.
+ uint64_t ParentNode;
+ /// The index of the edge between ParentNode and the current node.
+ uint64_t ParentEdgeIndex;
+ /// An indicator of whether the current node is in a queue.
+ bool Taken;
+ };
+ /// An edge in a flow network.
+ struct Edge {
+ /// The cost of the edge.
+ int64_t Cost;
+ /// The capacity of the edge.
+ int64_t Capacity;
+ /// The current flow on the edge.
+ int64_t Flow;
+ /// The destination node of the edge.
+ uint64_t Dst;
+ /// The index of the reverse edge between Dst and the current node.
+ uint64_t RevEdgeIndex;
+ };
+
+ /// The set of network nodes.
+ std::vector<Node> Nodes;
+ /// The set of network edges.
+ std::vector<std::vector<Edge>> Edges;
+ /// Source node of the flow.
+ uint64_t Source;
+ /// Target (sink) node of the flow.
+ uint64_t Target;
+};
+
+/// Initializing flow network for a given function.
+///
+/// Every block is split into three nodes that are responsible for (i) an
+/// incoming flow, (ii) an outgoing flow, and (iii) penalizing an increase or
+/// reduction of the block weight.
+void initializeNetwork(MinCostMaxFlow &Network, FlowFunction &Func) {
+ uint64_t NumBlocks = Func.Blocks.size();
+ assert(NumBlocks > 1 && "Too few blocks in a function");
+ LLVM_DEBUG(dbgs() << "Initializing profi for " << NumBlocks << " blocks\n");
+
+ // Pre-process data: make sure the entry weight is at least 1
+ if (Func.Blocks[Func.Entry].Weight == 0) {
+ Func.Blocks[Func.Entry].Weight = 1;
+ }
+ // Introducing dummy source/sink pairs to allow flow circulation.
+ // The nodes corresponding to blocks of Func have indicies in the range
+ // [0..3 * NumBlocks); the dummy nodes are indexed by the next four values.
+ uint64_t S = 3 * NumBlocks;
+ uint64_t T = S + 1;
+ uint64_t S1 = S + 2;
+ uint64_t T1 = S + 3;
+
+ Network.initialize(3 * NumBlocks + 4, S1, T1);
+
+ // Create three nodes for every block of the function
+ for (uint64_t B = 0; B < NumBlocks; B++) {
+ auto &Block = Func.Blocks[B];
+ assert((!Block.UnknownWeight || Block.Weight == 0 || Block.isEntry()) &&
+ "non-zero weight of a block w/o weight except for an entry");
+
+ // Split every block into two nodes
+ uint64_t Bin = 3 * B;
+ uint64_t Bout = 3 * B + 1;
+ uint64_t Baux = 3 * B + 2;
+ if (Block.Weight > 0) {
+ Network.addEdge(S1, Bout, Block.Weight, 0);
+ Network.addEdge(Bin, T1, Block.Weight, 0);
+ }
+
+ // Edges from S and to T
+ assert((!Block.isEntry() || !Block.isExit()) &&
+ "a block cannot be an entry and an exit");
+ if (Block.isEntry()) {
+ Network.addEdge(S, Bin, 0);
+ } else if (Block.isExit()) {
+ Network.addEdge(Bout, T, 0);
+ }
+
+ // An auxiliary node to allow increase/reduction of block counts:
+ // We assume that decreasing block counts is more expensive than increasing,
+ // and thus, setting separate costs here. In the future we may want to tune
+ // the relative costs so as to maximize the quality of generated profiles.
+ int64_t AuxCostInc = MinCostMaxFlow::AuxCostInc;
+ int64_t AuxCostDec = MinCostMaxFlow::AuxCostDec;
+ if (Block.UnknownWeight) {
+ // Do not penalize changing weights of blocks w/o known profile count
+ AuxCostInc = 0;
+ AuxCostDec = 0;
+ } else {
+ // Increasing the count for "cold" blocks with zero initial count is more
+ // expensive than for "hot" ones
+ if (Block.Weight == 0) {
+ AuxCostInc = MinCostMaxFlow::AuxCostIncZero;
+ }
+ // Modifying the count of the entry block is expensive
+ if (Block.isEntry()) {
+ AuxCostInc = MinCostMaxFlow::AuxCostIncEntry;
+ AuxCostDec = MinCostMaxFlow::AuxCostDecEntry;
+ }
+ }
+ // For blocks with self-edges, do not penalize a reduction of the count,
+ // as all of the increase can be attributed to the self-edge
+ if (Block.HasSelfEdge) {
+ AuxCostDec = 0;
+ }
+
+ Network.addEdge(Bin, Baux, AuxCostInc);
+ Network.addEdge(Baux, Bout, AuxCostInc);
+ if (Block.Weight > 0) {
+ Network.addEdge(Bout, Baux, AuxCostDec);
+ Network.addEdge(Baux, Bin, AuxCostDec);
+ }
+ }
+
+ // Creating edges for every jump
+ for (auto &Jump : Func.Jumps) {
+ uint64_t Src = Jump.Source;
+ uint64_t Dst = Jump.Target;
+ if (Src != Dst) {
+ uint64_t SrcOut = 3 * Src + 1;
+ uint64_t DstIn = 3 * Dst;
+ uint64_t Cost = Jump.IsUnlikely ? MinCostMaxFlow::AuxCostUnlikely : 0;
+ Network.addEdge(SrcOut, DstIn, Cost);
+ }
+ }
+
+ // Make sure we have a valid flow circulation
+ Network.addEdge(T, S, 0);
+}
+
+/// Extract resulting block and edge counts from the flow network.
+void extractWeights(MinCostMaxFlow &Network, FlowFunction &Func) {
+ uint64_t NumBlocks = Func.Blocks.size();
+
+ // Extract resulting block counts
+ for (uint64_t Src = 0; Src < NumBlocks; Src++) {
+ auto &Block = Func.Blocks[Src];
+ uint64_t SrcOut = 3 * Src + 1;
+ int64_t Flow = 0;
+ for (auto &Adj : Network.getFlow(SrcOut)) {
+ uint64_t DstIn = Adj.first;
+ int64_t DstFlow = Adj.second;
+ bool IsAuxNode = (DstIn < 3 * NumBlocks && DstIn % 3 == 2);
+ if (!IsAuxNode || Block.HasSelfEdge) {
+ Flow += DstFlow;
+ }
+ }
+ Block.Flow = Flow;
+ assert(Flow >= 0 && "negative block flow");
+ }
+
+ // Extract resulting jump counts
+ for (auto &Jump : Func.Jumps) {
+ uint64_t Src = Jump.Source;
+ uint64_t Dst = Jump.Target;
+ int64_t Flow = 0;
+ if (Src != Dst) {
+ uint64_t SrcOut = 3 * Src + 1;
+ uint64_t DstIn = 3 * Dst;
+ Flow = Network.getFlow(SrcOut, DstIn);
+ } else {
+ uint64_t SrcOut = 3 * Src + 1;
+ uint64_t SrcAux = 3 * Src + 2;
+ int64_t AuxFlow = Network.getFlow(SrcOut, SrcAux);
+ if (AuxFlow > 0)
+ Flow = AuxFlow;
+ }
+ Jump.Flow = Flow;
+ assert(Flow >= 0 && "negative jump flow");
+ }
+}
+
+#ifndef NDEBUG
+/// Verify that the computed flow values satisfy flow conservation rules
+void verifyWeights(const FlowFunction &Func) {
+ const uint64_t NumBlocks = Func.Blocks.size();
+ auto InFlow = std::vector<uint64_t>(NumBlocks, 0);
+ auto OutFlow = std::vector<uint64_t>(NumBlocks, 0);
+ for (auto &Jump : Func.Jumps) {
+ InFlow[Jump.Target] += Jump.Flow;
+ OutFlow[Jump.Source] += Jump.Flow;
+ }
+
+ uint64_t TotalInFlow = 0;
+ uint64_t TotalOutFlow = 0;
+ for (uint64_t I = 0; I < NumBlocks; I++) {
+ auto &Block = Func.Blocks[I];
+ if (Block.isEntry()) {
+ TotalInFlow += Block.Flow;
+ assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
+ } else if (Block.isExit()) {
+ TotalOutFlow += Block.Flow;
+ assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
+ } else {
+ assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
+ assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
+ }
+ }
+ assert(TotalInFlow == TotalOutFlow && "incorrectly computed control flow");
+}
+#endif
+
+} // end of anonymous namespace
+
+/// Apply the profile inference algorithm for a given flow function
+void llvm::applyFlowInference(FlowFunction &Func) {
+ // Create and apply an inference network model
+ auto InferenceNetwork = MinCostMaxFlow();
+ initializeNetwork(InferenceNetwork, Func);
+ InferenceNetwork.run();
+
+ // Extract flow values for every block and every edge
+ extractWeights(InferenceNetwork, Func);
+
+#ifndef NDEBUG
+ // Verify the result
+ verifyWeights(Func);
+#endif
+}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp
index 6d995cf4c048..ea0e8343eb88 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp
@@ -34,6 +34,10 @@ cl::opt<bool> NoWarnSampleUnused(
cl::desc("Use this option to turn off/on warnings about function with "
"samples but without debug information to use those samples. "));
+cl::opt<bool> SampleProfileUseProfi(
+ "sample-profile-use-profi", cl::init(false), cl::Hidden, cl::ZeroOrMore,
+ cl::desc("Use profi to infer block and edge counts."));
+
namespace sampleprofutil {
/// Return true if the given callsite is hot wrt to hot cutoff threshold.
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
index a042146d7ace..71c15d5c51fc 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -18,6 +18,7 @@
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -1833,22 +1834,6 @@ Value *SCEVExpander::expandCodeForImpl(const SCEV *SH, Type *Ty, bool Root) {
return V;
}
-/// Check whether value has nuw/nsw/exact set but SCEV does not.
-/// TODO: In reality it is better to check the poison recursively
-/// but this is better than nothing.
-static bool SCEVLostPoisonFlags(const SCEV *S, const Instruction *I) {
- if (isa<OverflowingBinaryOperator>(I)) {
- if (auto *NS = dyn_cast<SCEVNAryExpr>(S)) {
- if (I->hasNoSignedWrap() && !NS->hasNoSignedWrap())
- return true;
- if (I->hasNoUnsignedWrap() && !NS->hasNoUnsignedWrap())
- return true;
- }
- } else if (isa<PossiblyExactOperator>(I) && I->isExact())
- return true;
- return false;
-}
-
ScalarEvolution::ValueOffsetPair
SCEVExpander::FindValueInExprValueMap(const SCEV *S,
const Instruction *InsertPt) {
@@ -1872,8 +1857,7 @@ SCEVExpander::FindValueInExprValueMap(const SCEV *S,
if (S->getType() == V->getType() &&
SE.DT.dominates(EntInst, InsertPt) &&
(SE.LI.getLoopFor(EntInst->getParent()) == nullptr ||
- SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt)) &&
- !SCEVLostPoisonFlags(S, EntInst))
+ SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt)))
return {V, Offset};
}
}
@@ -1952,26 +1936,36 @@ Value *SCEVExpander::expand(const SCEV *S) {
if (!V)
V = visit(S);
- else if (VO.second) {
- if (PointerType *Vty = dyn_cast<PointerType>(V->getType())) {
- Type *Ety = Vty->getPointerElementType();
- int64_t Offset = VO.second->getSExtValue();
- int64_t ESize = SE.getTypeSizeInBits(Ety);
- if ((Offset * 8) % ESize == 0) {
- ConstantInt *Idx =
+ else {
+ // If we're reusing an existing instruction, we are effectively CSEing two
+ // copies of the instruction (with potentially different flags). As such,
+ // we need to drop any poison generating flags unless we can prove that
+ // said flags must be valid for all new users.
+ if (auto *I = dyn_cast<Instruction>(V))
+ if (I->hasPoisonGeneratingFlags() && !programUndefinedIfPoison(I))
+ I->dropPoisonGeneratingFlags();
+
+ if (VO.second) {
+ if (PointerType *Vty = dyn_cast<PointerType>(V->getType())) {
+ Type *Ety = Vty->getPointerElementType();
+ int64_t Offset = VO.second->getSExtValue();
+ int64_t ESize = SE.getTypeSizeInBits(Ety);
+ if ((Offset * 8) % ESize == 0) {
+ ConstantInt *Idx =
ConstantInt::getSigned(VO.second->getType(), -(Offset * 8) / ESize);
- V = Builder.CreateGEP(Ety, V, Idx, "scevgep");
- } else {
- ConstantInt *Idx =
+ V = Builder.CreateGEP(Ety, V, Idx, "scevgep");
+ } else {
+ ConstantInt *Idx =
ConstantInt::getSigned(VO.second->getType(), -Offset);
- unsigned AS = Vty->getAddressSpace();
- V = Builder.CreateBitCast(V, Type::getInt8PtrTy(SE.getContext(), AS));
- V = Builder.CreateGEP(Type::getInt8Ty(SE.getContext()), V, Idx,
- "uglygep");
- V = Builder.CreateBitCast(V, Vty);
+ unsigned AS = Vty->getAddressSpace();
+ V = Builder.CreateBitCast(V, Type::getInt8PtrTy(SE.getContext(), AS));
+ V = Builder.CreateGEP(Type::getInt8Ty(SE.getContext()), V, Idx,
+ "uglygep");
+ V = Builder.CreateBitCast(V, Vty);
+ }
+ } else {
+ V = Builder.CreateSub(V, VO.second);
}
- } else {
- V = Builder.CreateSub(V, VO.second);
}
}
// Remember the expanded value for this SCEV at this location.
@@ -2180,7 +2174,9 @@ SCEVExpander::getRelatedExistingExpansion(const SCEV *S, const Instruction *At,
}
// Use expand's logic which is used for reusing a previous Value in
- // ExprValueMap.
+ // ExprValueMap. Note that we don't currently model the cost of
+ // needing to drop poison generating flags on the instruction if we
+ // want to reuse it. We effectively assume that has zero cost.
ScalarEvolution::ValueOffsetPair VO = FindValueInExprValueMap(S, At);
if (VO.first)
return VO;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index f467de5f924e..afa3ecde77f9 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -3936,7 +3936,7 @@ bool SimplifyCFGOpt::SimplifyTerminatorOnSelect(Instruction *OldTerm,
BasicBlock *KeepEdge1 = TrueBB;
BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr;
- SmallPtrSet<BasicBlock *, 2> RemovedSuccessors;
+ SmallSetVector<BasicBlock *, 2> RemovedSuccessors;
// Then remove the rest.
for (BasicBlock *Succ : successors(OldTerm)) {
@@ -4782,6 +4782,26 @@ static bool CasesAreContiguous(SmallVectorImpl<ConstantInt *> &Cases) {
return true;
}
+static void createUnreachableSwitchDefault(SwitchInst *Switch,
+ DomTreeUpdater *DTU) {
+ LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
+ auto *BB = Switch->getParent();
+ auto *OrigDefaultBlock = Switch->getDefaultDest();
+ OrigDefaultBlock->removePredecessor(BB);
+ BasicBlock *NewDefaultBlock = BasicBlock::Create(
+ BB->getContext(), BB->getName() + ".unreachabledefault", BB->getParent(),
+ OrigDefaultBlock);
+ new UnreachableInst(Switch->getContext(), NewDefaultBlock);
+ Switch->setDefaultDest(&*NewDefaultBlock);
+ if (DTU) {
+ SmallVector<DominatorTree::UpdateType, 2> Updates;
+ Updates.push_back({DominatorTree::Insert, BB, &*NewDefaultBlock});
+ if (!is_contained(successors(BB), OrigDefaultBlock))
+ Updates.push_back({DominatorTree::Delete, BB, &*OrigDefaultBlock});
+ DTU->applyUpdates(Updates);
+ }
+}
+
/// Turn a switch with two reachable destinations into an integer range
/// comparison and branch.
bool SimplifyCFGOpt::TurnSwitchRangeIntoICmp(SwitchInst *SI,
@@ -4927,10 +4947,14 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU,
// Gather dead cases.
SmallVector<ConstantInt *, 8> DeadCases;
SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
+ SmallVector<BasicBlock *, 8> UniqueSuccessors;
for (auto &Case : SI->cases()) {
auto *Successor = Case.getCaseSuccessor();
- if (DTU)
+ if (DTU) {
+ if (!NumPerSuccessorCases.count(Successor))
+ UniqueSuccessors.push_back(Successor);
++NumPerSuccessorCases[Successor];
+ }
const APInt &CaseVal = Case.getCaseValue()->getValue();
if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) ||
(CaseVal.getMinSignedBits() > MaxSignificantBitsInCond)) {
@@ -4973,9 +4997,9 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU,
if (DTU) {
std::vector<DominatorTree::UpdateType> Updates;
- for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases)
- if (I.second == 0)
- Updates.push_back({DominatorTree::Delete, SI->getParent(), I.first});
+ for (auto *Successor : UniqueSuccessors)
+ if (NumPerSuccessorCases[Successor] == 0)
+ Updates.push_back({DominatorTree::Delete, SI->getParent(), Successor});
DTU->applyUpdates(Updates);
}
@@ -6040,15 +6064,13 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
if (Succ == SI->getDefaultDest())
continue;
Succ->removePredecessor(BB);
- RemovedSuccessors.insert(Succ);
+ if (DTU && RemovedSuccessors.insert(Succ).second)
+ Updates.push_back({DominatorTree::Delete, BB, Succ});
}
SI->eraseFromParent();
- if (DTU) {
- for (BasicBlock *RemovedSuccessor : RemovedSuccessors)
- Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor});
+ if (DTU)
DTU->applyUpdates(Updates);
- }
++NumLookupTables;
if (NeedMask)
@@ -6215,7 +6237,7 @@ bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) {
// Eliminate redundant destinations.
SmallPtrSet<Value *, 8> Succs;
- SmallPtrSet<BasicBlock *, 8> RemovedSuccs;
+ SmallSetVector<BasicBlock *, 8> RemovedSuccs;
for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
BasicBlock *Dest = IBI->getDestination(i);
if (!Dest->hasAddressTaken() || !Succs.insert(Dest).second) {
@@ -6305,8 +6327,8 @@ static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI,
// We've found an identical block. Update our predecessors to take that
// path instead and make ourselves dead.
- SmallPtrSet<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
- for (BasicBlock *Pred : Preds) {
+ SmallSetVector<BasicBlock *, 16> UniquePreds(pred_begin(BB), pred_end(BB));
+ for (BasicBlock *Pred : UniquePreds) {
InvokeInst *II = cast<InvokeInst>(Pred->getTerminator());
assert(II->getNormalDest() != BB && II->getUnwindDest() == BB &&
"unexpected successor");
@@ -6323,8 +6345,8 @@ static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI,
if (isa<DbgInfoIntrinsic>(Inst))
Inst.eraseFromParent();
- SmallPtrSet<BasicBlock *, 16> Succs(succ_begin(BB), succ_end(BB));
- for (BasicBlock *Succ : Succs) {
+ SmallSetVector<BasicBlock *, 16> UniqueSuccs(succ_begin(BB), succ_end(BB));
+ for (BasicBlock *Succ : UniqueSuccs) {
Succ->removePredecessor(BB);
if (DTU)
Updates.push_back({DominatorTree::Delete, BB, Succ});
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 23bb6f0860c9..5ca0adb4242c 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -473,18 +473,10 @@ public:
/// handle the more complex control flow around the loops.
virtual BasicBlock *createVectorizedLoopSkeleton();
- /// Widen a single instruction within the innermost loop.
- void widenInstruction(Instruction &I, VPValue *Def, VPUser &Operands,
- VPTransformState &State);
-
/// Widen a single call instruction within the innermost loop.
void widenCallInstruction(CallInst &I, VPValue *Def, VPUser &ArgOperands,
VPTransformState &State);
- /// Widen a single select instruction within the innermost loop.
- void widenSelectInstruction(SelectInst &I, VPValue *VPDef, VPUser &Operands,
- bool InvariantCond, VPTransformState &State);
-
/// Fix the vectorized code, taking care of header phi's, live-outs, and more.
void fixVectorizedLoop(VPTransformState &State);
@@ -496,12 +488,6 @@ public:
/// new unrolled loop, where UF is the unroll factor.
using VectorParts = SmallVector<Value *, 2>;
- /// Vectorize a single GetElementPtrInst based on information gathered and
- /// decisions taken during planning.
- void widenGEP(GetElementPtrInst *GEP, VPValue *VPDef, VPUser &Indices,
- unsigned UF, ElementCount VF, bool IsPtrLoopInvariant,
- SmallBitVector &IsIndexLoopInvariant, VPTransformState &State);
-
/// Vectorize a single first-order recurrence or pointer induction PHINode in
/// a block. This method handles the induction variable canonicalization. It
/// supports both VF = 1 for unrolled loops and arbitrary length vectors.
@@ -511,9 +497,9 @@ public:
/// A helper function to scalarize a single Instruction in the innermost loop.
/// Generates a sequence of scalar instances for each lane between \p MinLane
/// and \p MaxLane, times each part between \p MinPart and \p MaxPart,
- /// inclusive. Uses the VPValue operands from \p Operands instead of \p
+ /// inclusive. Uses the VPValue operands from \p RepRecipe instead of \p
/// Instr's operands.
- void scalarizeInstruction(Instruction *Instr, VPValue *Def, VPUser &Operands,
+ void scalarizeInstruction(Instruction *Instr, VPReplicateRecipe *RepRecipe,
const VPIteration &Instance, bool IfPredicateInstr,
VPTransformState &State);
@@ -538,15 +524,6 @@ public:
ArrayRef<VPValue *> StoredValues,
VPValue *BlockInMask = nullptr);
- /// Vectorize Load and Store instructions with the base address given in \p
- /// Addr, optionally masking the vector operations if \p BlockInMask is
- /// non-null. Use \p State to translate given VPValues to IR values in the
- /// vectorized loop.
- void vectorizeMemoryInstruction(Instruction *Instr, VPTransformState &State,
- VPValue *Def, VPValue *Addr,
- VPValue *StoredValue, VPValue *BlockInMask,
- bool ConsecutiveStride, bool Reverse);
-
/// Set the debug location in the builder \p Ptr using the debug location in
/// \p V. If \p Ptr is None then it uses the class member's Builder.
void setDebugLocFromInst(const Value *V,
@@ -566,6 +543,17 @@ public:
/// element.
virtual Value *getBroadcastInstrs(Value *V);
+ /// Add metadata from one instruction to another.
+ ///
+ /// This includes both the original MDs from \p From and additional ones (\see
+ /// addNewMetadata). Use this for *newly created* instructions in the vector
+ /// loop.
+ void addMetadata(Instruction *To, Instruction *From);
+
+ /// Similar to the previous function but it adds the metadata to a
+ /// vector of instructions.
+ void addMetadata(ArrayRef<Value *> To, Instruction *From);
+
protected:
friend class LoopVectorizationPlanner;
@@ -741,16 +729,16 @@ protected:
/// vector loop.
void addNewMetadata(Instruction *To, const Instruction *Orig);
- /// Add metadata from one instruction to another.
- ///
- /// This includes both the original MDs from \p From and additional ones (\see
- /// addNewMetadata). Use this for *newly created* instructions in the vector
- /// loop.
- void addMetadata(Instruction *To, Instruction *From);
-
- /// Similar to the previous function but it adds the metadata to a
- /// vector of instructions.
- void addMetadata(ArrayRef<Value *> To, Instruction *From);
+ /// Collect poison-generating recipes that may generate a poison value that is
+ /// used after vectorization, even when their operands are not poison. Those
+ /// recipes meet the following conditions:
+ /// * Contribute to the address computation of a recipe generating a widen
+ /// memory load/store (VPWidenMemoryInstructionRecipe or
+ /// VPInterleaveRecipe).
+ /// * Such a widen memory load/store has at least one underlying Instruction
+ /// that is in a basic block that needs predication and after vectorization
+ /// the generated instruction won't be predicated.
+ void collectPoisonGeneratingRecipes(VPTransformState &State);
/// Allow subclasses to override and print debug traces before/after vplan
/// execution, when trace information is requested.
@@ -1173,6 +1161,84 @@ void InnerLoopVectorizer::addNewMetadata(Instruction *To,
LVer->annotateInstWithNoAlias(To, Orig);
}
+void InnerLoopVectorizer::collectPoisonGeneratingRecipes(
+ VPTransformState &State) {
+
+ // Collect recipes in the backward slice of `Root` that may generate a poison
+ // value that is used after vectorization.
+ SmallPtrSet<VPRecipeBase *, 16> Visited;
+ auto collectPoisonGeneratingInstrsInBackwardSlice([&](VPRecipeBase *Root) {
+ SmallVector<VPRecipeBase *, 16> Worklist;
+ Worklist.push_back(Root);
+
+ // Traverse the backward slice of Root through its use-def chain.
+ while (!Worklist.empty()) {
+ VPRecipeBase *CurRec = Worklist.back();
+ Worklist.pop_back();
+
+ if (!Visited.insert(CurRec).second)
+ continue;
+
+ // Prune search if we find another recipe generating a widen memory
+ // instruction. Widen memory instructions involved in address computation
+ // will lead to gather/scatter instructions, which don't need to be
+ // handled.
+ if (isa<VPWidenMemoryInstructionRecipe>(CurRec) ||
+ isa<VPInterleaveRecipe>(CurRec))
+ continue;
+
+ // This recipe contributes to the address computation of a widen
+ // load/store. Collect recipe if its underlying instruction has
+ // poison-generating flags.
+ Instruction *Instr = CurRec->getUnderlyingInstr();
+ if (Instr && Instr->hasPoisonGeneratingFlags())
+ State.MayGeneratePoisonRecipes.insert(CurRec);
+
+ // Add new definitions to the worklist.
+ for (VPValue *operand : CurRec->operands())
+ if (VPDef *OpDef = operand->getDef())
+ Worklist.push_back(cast<VPRecipeBase>(OpDef));
+ }
+ });
+
+ // Traverse all the recipes in the VPlan and collect the poison-generating
+ // recipes in the backward slice starting at the address of a VPWidenRecipe or
+ // VPInterleaveRecipe.
+ auto Iter = depth_first(
+ VPBlockRecursiveTraversalWrapper<VPBlockBase *>(State.Plan->getEntry()));
+ for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {
+ for (VPRecipeBase &Recipe : *VPBB) {
+ if (auto *WidenRec = dyn_cast<VPWidenMemoryInstructionRecipe>(&Recipe)) {
+ Instruction *UnderlyingInstr = WidenRec->getUnderlyingInstr();
+ VPDef *AddrDef = WidenRec->getAddr()->getDef();
+ if (AddrDef && WidenRec->isConsecutive() && UnderlyingInstr &&
+ Legal->blockNeedsPredication(UnderlyingInstr->getParent()))
+ collectPoisonGeneratingInstrsInBackwardSlice(
+ cast<VPRecipeBase>(AddrDef));
+ } else if (auto *InterleaveRec = dyn_cast<VPInterleaveRecipe>(&Recipe)) {
+ VPDef *AddrDef = InterleaveRec->getAddr()->getDef();
+ if (AddrDef) {
+ // Check if any member of the interleave group needs predication.
+ const InterleaveGroup<Instruction> *InterGroup =
+ InterleaveRec->getInterleaveGroup();
+ bool NeedPredication = false;
+ for (int I = 0, NumMembers = InterGroup->getNumMembers();
+ I < NumMembers; ++I) {
+ Instruction *Member = InterGroup->getMember(I);
+ if (Member)
+ NeedPredication |=
+ Legal->blockNeedsPredication(Member->getParent());
+ }
+
+ if (NeedPredication)
+ collectPoisonGeneratingInstrsInBackwardSlice(
+ cast<VPRecipeBase>(AddrDef));
+ }
+ }
+ }
+ }
+}
+
void InnerLoopVectorizer::addMetadata(Instruction *To,
Instruction *From) {
propagateMetadata(To, From);
@@ -1541,7 +1607,16 @@ public:
// Returns true if \p I is an instruction that will be predicated either
// through scalar predication or masked load/store or masked gather/scatter.
// Superset of instructions that return true for isScalarWithPredication.
- bool isPredicatedInst(Instruction *I) {
+ bool isPredicatedInst(Instruction *I, bool IsKnownUniform = false) {
+ // When we know the load is uniform and the original scalar loop was not
+ // predicated we don't need to mark it as a predicated instruction. Any
+ // vectorised blocks created when tail-folding are something artificial we
+ // have introduced and we know there is always at least one active lane.
+ // That's why we call Legal->blockNeedsPredication here because it doesn't
+ // query tail-folding.
+ if (IsKnownUniform && isa<LoadInst>(I) &&
+ !Legal->blockNeedsPredication(I->getParent()))
+ return false;
if (!blockNeedsPredicationForAnyReason(I->getParent()))
return false;
// Loads and stores that need some form of masked operation are predicated
@@ -1816,9 +1891,11 @@ private:
/// Collect the instructions that are scalar after vectorization. An
/// instruction is scalar if it is known to be uniform or will be scalarized
- /// during vectorization. Non-uniform scalarized instructions will be
- /// represented by VF values in the vectorized loop, each corresponding to an
- /// iteration of the original scalar loop.
+ /// during vectorization. collectLoopScalars should only add non-uniform nodes
+ /// to the list if they are used by a load/store instruction that is marked as
+ /// CM_Scalarize. Non-uniform scalarized instructions will be represented by
+ /// VF values in the vectorized loop, each corresponding to an iteration of
+ /// the original scalar loop.
void collectLoopScalars(ElementCount VF);
/// Keeps cost model vectorization decision and cost for instructions.
@@ -2918,132 +2995,8 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(
}
}
-void InnerLoopVectorizer::vectorizeMemoryInstruction(
- Instruction *Instr, VPTransformState &State, VPValue *Def, VPValue *Addr,
- VPValue *StoredValue, VPValue *BlockInMask, bool ConsecutiveStride,
- bool Reverse) {
- // Attempt to issue a wide load.
- LoadInst *LI = dyn_cast<LoadInst>(Instr);
- StoreInst *SI = dyn_cast<StoreInst>(Instr);
-
- assert((LI || SI) && "Invalid Load/Store instruction");
- assert((!SI || StoredValue) && "No stored value provided for widened store");
- assert((!LI || !StoredValue) && "Stored value provided for widened load");
-
- Type *ScalarDataTy = getLoadStoreType(Instr);
-
- auto *DataTy = VectorType::get(ScalarDataTy, VF);
- const Align Alignment = getLoadStoreAlignment(Instr);
- bool CreateGatherScatter = !ConsecutiveStride;
-
- VectorParts BlockInMaskParts(UF);
- bool isMaskRequired = BlockInMask;
- if (isMaskRequired)
- for (unsigned Part = 0; Part < UF; ++Part)
- BlockInMaskParts[Part] = State.get(BlockInMask, Part);
-
- const auto CreateVecPtr = [&](unsigned Part, Value *Ptr) -> Value * {
- // Calculate the pointer for the specific unroll-part.
- GetElementPtrInst *PartPtr = nullptr;
-
- bool InBounds = false;
- if (auto *gep = dyn_cast<GetElementPtrInst>(Ptr->stripPointerCasts()))
- InBounds = gep->isInBounds();
- if (Reverse) {
- // If the address is consecutive but reversed, then the
- // wide store needs to start at the last vector element.
- // RunTimeVF = VScale * VF.getKnownMinValue()
- // For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue()
- Value *RunTimeVF = getRuntimeVF(Builder, Builder.getInt32Ty(), VF);
- // NumElt = -Part * RunTimeVF
- Value *NumElt = Builder.CreateMul(Builder.getInt32(-Part), RunTimeVF);
- // LastLane = 1 - RunTimeVF
- Value *LastLane = Builder.CreateSub(Builder.getInt32(1), RunTimeVF);
- PartPtr =
- cast<GetElementPtrInst>(Builder.CreateGEP(ScalarDataTy, Ptr, NumElt));
- PartPtr->setIsInBounds(InBounds);
- PartPtr = cast<GetElementPtrInst>(
- Builder.CreateGEP(ScalarDataTy, PartPtr, LastLane));
- PartPtr->setIsInBounds(InBounds);
- if (isMaskRequired) // Reverse of a null all-one mask is a null mask.
- BlockInMaskParts[Part] = reverseVector(BlockInMaskParts[Part]);
- } else {
- Value *Increment =
- createStepForVF(Builder, Builder.getInt32Ty(), VF, Part);
- PartPtr = cast<GetElementPtrInst>(
- Builder.CreateGEP(ScalarDataTy, Ptr, Increment));
- PartPtr->setIsInBounds(InBounds);
- }
-
- unsigned AddressSpace = Ptr->getType()->getPointerAddressSpace();
- return Builder.CreateBitCast(PartPtr, DataTy->getPointerTo(AddressSpace));
- };
-
- // Handle Stores:
- if (SI) {
- setDebugLocFromInst(SI);
-
- for (unsigned Part = 0; Part < UF; ++Part) {
- Instruction *NewSI = nullptr;
- Value *StoredVal = State.get(StoredValue, Part);
- if (CreateGatherScatter) {
- Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
- Value *VectorGep = State.get(Addr, Part);
- NewSI = Builder.CreateMaskedScatter(StoredVal, VectorGep, Alignment,
- MaskPart);
- } else {
- if (Reverse) {
- // If we store to reverse consecutive memory locations, then we need
- // to reverse the order of elements in the stored value.
- StoredVal = reverseVector(StoredVal);
- // We don't want to update the value in the map as it might be used in
- // another expression. So don't call resetVectorValue(StoredVal).
- }
- auto *VecPtr = CreateVecPtr(Part, State.get(Addr, VPIteration(0, 0)));
- if (isMaskRequired)
- NewSI = Builder.CreateMaskedStore(StoredVal, VecPtr, Alignment,
- BlockInMaskParts[Part]);
- else
- NewSI = Builder.CreateAlignedStore(StoredVal, VecPtr, Alignment);
- }
- addMetadata(NewSI, SI);
- }
- return;
- }
-
- // Handle loads.
- assert(LI && "Must have a load instruction");
- setDebugLocFromInst(LI);
- for (unsigned Part = 0; Part < UF; ++Part) {
- Value *NewLI;
- if (CreateGatherScatter) {
- Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
- Value *VectorGep = State.get(Addr, Part);
- NewLI = Builder.CreateMaskedGather(DataTy, VectorGep, Alignment, MaskPart,
- nullptr, "wide.masked.gather");
- addMetadata(NewLI, LI);
- } else {
- auto *VecPtr = CreateVecPtr(Part, State.get(Addr, VPIteration(0, 0)));
- if (isMaskRequired)
- NewLI = Builder.CreateMaskedLoad(
- DataTy, VecPtr, Alignment, BlockInMaskParts[Part],
- PoisonValue::get(DataTy), "wide.masked.load");
- else
- NewLI =
- Builder.CreateAlignedLoad(DataTy, VecPtr, Alignment, "wide.load");
-
- // Add metadata to the load, but setVectorValue to the reverse shuffle.
- addMetadata(NewLI, LI);
- if (Reverse)
- NewLI = reverseVector(NewLI);
- }
-
- State.set(Def, NewLI, Part);
- }
-}
-
-void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, VPValue *Def,
- VPUser &User,
+void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr,
+ VPReplicateRecipe *RepRecipe,
const VPIteration &Instance,
bool IfPredicateInstr,
VPTransformState &State) {
@@ -3064,17 +3017,26 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, VPValue *Def,
if (!IsVoidRetTy)
Cloned->setName(Instr->getName() + ".cloned");
+ // If the scalarized instruction contributes to the address computation of a
+ // widen masked load/store which was in a basic block that needed predication
+ // and is not predicated after vectorization, we can't propagate
+ // poison-generating flags (nuw/nsw, exact, inbounds, etc.). The scalarized
+ // instruction could feed a poison value to the base address of the widen
+ // load/store.
+ if (State.MayGeneratePoisonRecipes.count(RepRecipe) > 0)
+ Cloned->dropPoisonGeneratingFlags();
+
State.Builder.SetInsertPoint(Builder.GetInsertBlock(),
Builder.GetInsertPoint());
// Replace the operands of the cloned instructions with their scalar
// equivalents in the new loop.
- for (unsigned op = 0, e = User.getNumOperands(); op != e; ++op) {
+ for (unsigned op = 0, e = RepRecipe->getNumOperands(); op != e; ++op) {
auto *Operand = dyn_cast<Instruction>(Instr->getOperand(op));
auto InputInstance = Instance;
if (!Operand || !OrigLoop->contains(Operand) ||
(Cost->isUniformAfterVectorization(Operand, State.VF)))
InputInstance.Lane = VPLane::getFirstLane();
- auto *NewOp = State.get(User.getOperand(op), InputInstance);
+ auto *NewOp = State.get(RepRecipe->getOperand(op), InputInstance);
Cloned->setOperand(op, NewOp);
}
addNewMetadata(Cloned, Instr);
@@ -3082,7 +3044,7 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, VPValue *Def,
// Place the cloned scalar in the new loop.
Builder.Insert(Cloned);
- State.set(Def, Cloned, Instance);
+ State.set(RepRecipe, Cloned, Instance);
// If we just cloned a new assumption, add it the assumption cache.
if (auto *II = dyn_cast<AssumeInst>(Cloned))
@@ -4615,77 +4577,6 @@ bool InnerLoopVectorizer::useOrderedReductions(RecurrenceDescriptor &RdxDesc) {
return Cost->useOrderedReductions(RdxDesc);
}
-void InnerLoopVectorizer::widenGEP(GetElementPtrInst *GEP, VPValue *VPDef,
- VPUser &Operands, unsigned UF,
- ElementCount VF, bool IsPtrLoopInvariant,
- SmallBitVector &IsIndexLoopInvariant,
- VPTransformState &State) {
- // Construct a vector GEP by widening the operands of the scalar GEP as
- // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP
- // results in a vector of pointers when at least one operand of the GEP
- // is vector-typed. Thus, to keep the representation compact, we only use
- // vector-typed operands for loop-varying values.
-
- if (VF.isVector() && IsPtrLoopInvariant && IsIndexLoopInvariant.all()) {
- // If we are vectorizing, but the GEP has only loop-invariant operands,
- // the GEP we build (by only using vector-typed operands for
- // loop-varying values) would be a scalar pointer. Thus, to ensure we
- // produce a vector of pointers, we need to either arbitrarily pick an
- // operand to broadcast, or broadcast a clone of the original GEP.
- // Here, we broadcast a clone of the original.
- //
- // TODO: If at some point we decide to scalarize instructions having
- // loop-invariant operands, this special case will no longer be
- // required. We would add the scalarization decision to
- // collectLoopScalars() and teach getVectorValue() to broadcast
- // the lane-zero scalar value.
- auto *Clone = Builder.Insert(GEP->clone());
- for (unsigned Part = 0; Part < UF; ++Part) {
- Value *EntryPart = Builder.CreateVectorSplat(VF, Clone);
- State.set(VPDef, EntryPart, Part);
- addMetadata(EntryPart, GEP);
- }
- } else {
- // If the GEP has at least one loop-varying operand, we are sure to
- // produce a vector of pointers. But if we are only unrolling, we want
- // to produce a scalar GEP for each unroll part. Thus, the GEP we
- // produce with the code below will be scalar (if VF == 1) or vector
- // (otherwise). Note that for the unroll-only case, we still maintain
- // values in the vector mapping with initVector, as we do for other
- // instructions.
- for (unsigned Part = 0; Part < UF; ++Part) {
- // The pointer operand of the new GEP. If it's loop-invariant, we
- // won't broadcast it.
- auto *Ptr = IsPtrLoopInvariant
- ? State.get(Operands.getOperand(0), VPIteration(0, 0))
- : State.get(Operands.getOperand(0), Part);
-
- // Collect all the indices for the new GEP. If any index is
- // loop-invariant, we won't broadcast it.
- SmallVector<Value *, 4> Indices;
- for (unsigned I = 1, E = Operands.getNumOperands(); I < E; I++) {
- VPValue *Operand = Operands.getOperand(I);
- if (IsIndexLoopInvariant[I - 1])
- Indices.push_back(State.get(Operand, VPIteration(0, 0)));
- else
- Indices.push_back(State.get(Operand, Part));
- }
-
- // Create the new GEP. Note that this GEP may be a scalar if VF == 1,
- // but it should be a vector, otherwise.
- auto *NewGEP =
- GEP->isInBounds()
- ? Builder.CreateInBoundsGEP(GEP->getSourceElementType(), Ptr,
- Indices)
- : Builder.CreateGEP(GEP->getSourceElementType(), Ptr, Indices);
- assert((VF.isScalar() || NewGEP->getType()->isVectorTy()) &&
- "NewGEP is not a pointer vector");
- State.set(VPDef, NewGEP, Part);
- addMetadata(NewGEP, GEP);
- }
- }
-}
-
void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
VPWidenPHIRecipe *PhiR,
VPTransformState &State) {
@@ -4745,38 +4636,14 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
// iteration. If the instruction is uniform, we only need to generate the
// first lane. Otherwise, we generate all VF values.
bool IsUniform = Cost->isUniformAfterVectorization(P, State.VF);
- unsigned Lanes = IsUniform ? 1 : State.VF.getKnownMinValue();
-
- bool NeedsVectorIndex = !IsUniform && VF.isScalable();
- Value *UnitStepVec = nullptr, *PtrIndSplat = nullptr;
- if (NeedsVectorIndex) {
- Type *VecIVTy = VectorType::get(PtrInd->getType(), VF);
- UnitStepVec = Builder.CreateStepVector(VecIVTy);
- PtrIndSplat = Builder.CreateVectorSplat(VF, PtrInd);
- }
+ assert((IsUniform || !State.VF.isScalable()) &&
+ "Cannot scalarize a scalable VF");
+ unsigned Lanes = IsUniform ? 1 : State.VF.getFixedValue();
for (unsigned Part = 0; Part < UF; ++Part) {
Value *PartStart =
createStepForVF(Builder, PtrInd->getType(), VF, Part);
- if (NeedsVectorIndex) {
- // Here we cache the whole vector, which means we can support the
- // extraction of any lane. However, in some cases the extractelement
- // instruction that is generated for scalar uses of this vector (e.g.
- // a load instruction) is not folded away. Therefore we still
- // calculate values for the first n lanes to avoid redundant moves
- // (when extracting the 0th element) and to produce scalar code (i.e.
- // additional add/gep instructions instead of expensive extractelement
- // instructions) when extracting higher-order elements.
- Value *PartStartSplat = Builder.CreateVectorSplat(VF, PartStart);
- Value *Indices = Builder.CreateAdd(PartStartSplat, UnitStepVec);
- Value *GlobalIndices = Builder.CreateAdd(PtrIndSplat, Indices);
- Value *SclrGep =
- emitTransformedIndex(Builder, GlobalIndices, PSE.getSE(), DL, II);
- SclrGep->setName("next.gep");
- State.set(PhiR, SclrGep, Part);
- }
-
for (unsigned Lane = 0; Lane < Lanes; ++Lane) {
Value *Idx = Builder.CreateAdd(
PartStart, ConstantInt::get(PtrInd->getType(), Lane));
@@ -4858,114 +4725,6 @@ static bool mayDivideByZero(Instruction &I) {
return !CInt || CInt->isZero();
}
-void InnerLoopVectorizer::widenInstruction(Instruction &I, VPValue *Def,
- VPUser &User,
- VPTransformState &State) {
- switch (I.getOpcode()) {
- case Instruction::Call:
- case Instruction::Br:
- case Instruction::PHI:
- case Instruction::GetElementPtr:
- case Instruction::Select:
- llvm_unreachable("This instruction is handled by a different recipe.");
- case Instruction::UDiv:
- case Instruction::SDiv:
- case Instruction::SRem:
- case Instruction::URem:
- case Instruction::Add:
- case Instruction::FAdd:
- case Instruction::Sub:
- case Instruction::FSub:
- case Instruction::FNeg:
- case Instruction::Mul:
- case Instruction::FMul:
- case Instruction::FDiv:
- case Instruction::FRem:
- case Instruction::Shl:
- case Instruction::LShr:
- case Instruction::AShr:
- case Instruction::And:
- case Instruction::Or:
- case Instruction::Xor: {
- // Just widen unops and binops.
- setDebugLocFromInst(&I);
-
- for (unsigned Part = 0; Part < UF; ++Part) {
- SmallVector<Value *, 2> Ops;
- for (VPValue *VPOp : User.operands())
- Ops.push_back(State.get(VPOp, Part));
-
- Value *V = Builder.CreateNAryOp(I.getOpcode(), Ops);
-
- if (auto *VecOp = dyn_cast<Instruction>(V))
- VecOp->copyIRFlags(&I);
-
- // Use this vector value for all users of the original instruction.
- State.set(Def, V, Part);
- addMetadata(V, &I);
- }
-
- break;
- }
- case Instruction::ICmp:
- case Instruction::FCmp: {
- // Widen compares. Generate vector compares.
- bool FCmp = (I.getOpcode() == Instruction::FCmp);
- auto *Cmp = cast<CmpInst>(&I);
- setDebugLocFromInst(Cmp);
- for (unsigned Part = 0; Part < UF; ++Part) {
- Value *A = State.get(User.getOperand(0), Part);
- Value *B = State.get(User.getOperand(1), Part);
- Value *C = nullptr;
- if (FCmp) {
- // Propagate fast math flags.
- IRBuilder<>::FastMathFlagGuard FMFG(Builder);
- Builder.setFastMathFlags(Cmp->getFastMathFlags());
- C = Builder.CreateFCmp(Cmp->getPredicate(), A, B);
- } else {
- C = Builder.CreateICmp(Cmp->getPredicate(), A, B);
- }
- State.set(Def, C, Part);
- addMetadata(C, &I);
- }
-
- break;
- }
-
- case Instruction::ZExt:
- case Instruction::SExt:
- case Instruction::FPToUI:
- case Instruction::FPToSI:
- case Instruction::FPExt:
- case Instruction::PtrToInt:
- case Instruction::IntToPtr:
- case Instruction::SIToFP:
- case Instruction::UIToFP:
- case Instruction::Trunc:
- case Instruction::FPTrunc:
- case Instruction::BitCast: {
- auto *CI = cast<CastInst>(&I);
- setDebugLocFromInst(CI);
-
- /// Vectorize casts.
- Type *DestTy =
- (VF.isScalar()) ? CI->getType() : VectorType::get(CI->getType(), VF);
-
- for (unsigned Part = 0; Part < UF; ++Part) {
- Value *A = State.get(User.getOperand(0), Part);
- Value *Cast = Builder.CreateCast(CI->getOpcode(), A, DestTy);
- State.set(Def, Cast, Part);
- addMetadata(Cast, &I);
- }
- break;
- }
- default:
- // This instruction is not vectorized by simple widening.
- LLVM_DEBUG(dbgs() << "LV: Found an unhandled instruction: " << I);
- llvm_unreachable("Unhandled instruction!");
- } // end of switch.
-}
-
void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPValue *Def,
VPUser &ArgOperands,
VPTransformState &State) {
@@ -5039,31 +4798,6 @@ void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPValue *Def,
}
}
-void InnerLoopVectorizer::widenSelectInstruction(SelectInst &I, VPValue *VPDef,
- VPUser &Operands,
- bool InvariantCond,
- VPTransformState &State) {
- setDebugLocFromInst(&I);
-
- // The condition can be loop invariant but still defined inside the
- // loop. This means that we can't just use the original 'cond' value.
- // We have to take the 'vectorized' value and pick the first lane.
- // Instcombine will make this a no-op.
- auto *InvarCond = InvariantCond
- ? State.get(Operands.getOperand(0), VPIteration(0, 0))
- : nullptr;
-
- for (unsigned Part = 0; Part < UF; ++Part) {
- Value *Cond =
- InvarCond ? InvarCond : State.get(Operands.getOperand(0), Part);
- Value *Op0 = State.get(Operands.getOperand(1), Part);
- Value *Op1 = State.get(Operands.getOperand(2), Part);
- Value *Sel = Builder.CreateSelect(Cond, Op0, Op1);
- State.set(VPDef, Sel, Part);
- addMetadata(Sel, &I);
- }
-}
-
void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) {
// We should not collect Scalars more than once per VF. Right now, this
// function is called from collectUniformsAndScalars(), which already does
@@ -5103,38 +4837,11 @@ void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) {
!TheLoop->isLoopInvariant(V);
};
- auto isScalarPtrInduction = [&](Instruction *MemAccess, Value *Ptr) {
- if (!isa<PHINode>(Ptr) ||
- !Legal->getInductionVars().count(cast<PHINode>(Ptr)))
- return false;
- auto &Induction = Legal->getInductionVars()[cast<PHINode>(Ptr)];
- if (Induction.getKind() != InductionDescriptor::IK_PtrInduction)
- return false;
- return isScalarUse(MemAccess, Ptr);
- };
-
- // A helper that evaluates a memory access's use of a pointer. If the
- // pointer is actually the pointer induction of a loop, it is being
- // inserted into Worklist. If the use will be a scalar use, and the
- // pointer is only used by memory accesses, we place the pointer in
- // ScalarPtrs. Otherwise, the pointer is placed in PossibleNonScalarPtrs.
+ // A helper that evaluates a memory access's use of a pointer. If the use will
+ // be a scalar use and the pointer is only used by memory accesses, we place
+ // the pointer in ScalarPtrs. Otherwise, the pointer is placed in
+ // PossibleNonScalarPtrs.
auto evaluatePtrUse = [&](Instruction *MemAccess, Value *Ptr) {
- if (isScalarPtrInduction(MemAccess, Ptr)) {
- Worklist.insert(cast<Instruction>(Ptr));
- LLVM_DEBUG(dbgs() << "LV: Found new scalar instruction: " << *Ptr
- << "\n");
-
- Instruction *Update = cast<Instruction>(
- cast<PHINode>(Ptr)->getIncomingValueForBlock(Latch));
-
- // If there is more than one user of Update (Ptr), we shouldn't assume it
- // will be scalar after vectorisation as other users of the instruction
- // may require widening. Otherwise, add it to ScalarPtrs.
- if (Update->hasOneUse() && cast<Value>(*Update->user_begin()) == Ptr) {
- ScalarPtrs.insert(Update);
- return;
- }
- }
// We only care about bitcast and getelementptr instructions contained in
// the loop.
if (!isLoopVaryingBitCastOrGEP(Ptr))
@@ -5226,11 +4933,22 @@ void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) {
if (Ind == Legal->getPrimaryInduction() && foldTailByMasking())
continue;
+ // Returns true if \p Indvar is a pointer induction that is used directly by
+ // load/store instruction \p I.
+ auto IsDirectLoadStoreFromPtrIndvar = [&](Instruction *Indvar,
+ Instruction *I) {
+ return Induction.second.getKind() ==
+ InductionDescriptor::IK_PtrInduction &&
+ (isa<LoadInst>(I) || isa<StoreInst>(I)) &&
+ Indvar == getLoadStorePointerOperand(I) && isScalarUse(I, Indvar);
+ };
+
// Determine if all users of the induction variable are scalar after
// vectorization.
auto ScalarInd = llvm::all_of(Ind->users(), [&](User *U) -> bool {
auto *I = cast<Instruction>(U);
- return I == IndUpdate || !TheLoop->contains(I) || Worklist.count(I);
+ return I == IndUpdate || !TheLoop->contains(I) || Worklist.count(I) ||
+ IsDirectLoadStoreFromPtrIndvar(Ind, I);
});
if (!ScalarInd)
continue;
@@ -5240,7 +4958,8 @@ void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) {
auto ScalarIndUpdate =
llvm::all_of(IndUpdate->users(), [&](User *U) -> bool {
auto *I = cast<Instruction>(U);
- return I == Ind || !TheLoop->contains(I) || Worklist.count(I);
+ return I == Ind || !TheLoop->contains(I) || Worklist.count(I) ||
+ IsDirectLoadStoreFromPtrIndvar(IndUpdate, I);
});
if (!ScalarIndUpdate)
continue;
@@ -7079,6 +6798,8 @@ LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
unsigned AS = getLoadStoreAddressSpace(I);
Value *Ptr = getLoadStorePointerOperand(I);
Type *PtrTy = ToVectorTy(Ptr->getType(), VF);
+ // NOTE: PtrTy is a vector to signal `TTI::getAddressComputationCost`
+ // that it is being called from this specific place.
// Figure out whether the access is strided and get the stride value
// if it's known in compile time
@@ -7286,6 +7007,12 @@ Optional<InstructionCost> LoopVectorizationCostModel::getReductionPatternCost(
InstructionCost BaseCost = TTI.getArithmeticReductionCost(
RdxDesc.getOpcode(), VectorTy, RdxDesc.getFastMathFlags(), CostKind);
+ // For a call to the llvm.fmuladd intrinsic we need to add the cost of a
+ // normal fmul instruction to the cost of the fadd reduction.
+ if (RdxDesc.getRecurrenceKind() == RecurKind::FMulAdd)
+ BaseCost +=
+ TTI.getArithmeticInstrCost(Instruction::FMul, VectorTy, CostKind);
+
// If we're using ordered reductions then we can just return the base cost
// here, since getArithmeticReductionCost calculates the full ordered
// reduction cost when FP reassociation is not allowed.
@@ -7962,6 +7689,9 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF,
return TTI.getCastInstrCost(Opcode, VectorTy, SrcVecTy, CCH, CostKind, I);
}
case Instruction::Call: {
+ if (RecurrenceDescriptor::isFMulAddIntrinsic(I))
+ if (auto RedCost = getReductionPatternCost(I, VF, VectorTy, CostKind))
+ return *RedCost;
bool NeedToScalarize;
CallInst *CI = cast<CallInst>(I);
InstructionCost CallCost = getVectorCallCost(CI, VF, NeedToScalarize);
@@ -8260,6 +7990,7 @@ void LoopVectorizationPlanner::executePlan(ElementCount BestVF, unsigned BestUF,
State.CFG.PrevBB = ILV.createVectorizedLoopSkeleton();
State.TripCount = ILV.getOrCreateTripCount(nullptr);
State.CanonicalIV = ILV.Induction;
+ ILV.collectPoisonGeneratingRecipes(State);
ILV.printDebugTracesAtStart();
@@ -8468,7 +8199,8 @@ void EpilogueVectorizerMainLoop::printDebugTracesAtStart() {
void EpilogueVectorizerMainLoop::printDebugTracesAtEnd() {
DEBUG_WITH_TYPE(VerboseDebug, {
- dbgs() << "intermediate fn:\n" << *Induction->getFunction() << "\n";
+ dbgs() << "intermediate fn:\n"
+ << *OrigLoop->getHeader()->getParent() << "\n";
});
}
@@ -8666,7 +8398,7 @@ void EpilogueVectorizerEpilogueLoop::printDebugTracesAtStart() {
void EpilogueVectorizerEpilogueLoop::printDebugTracesAtEnd() {
DEBUG_WITH_TYPE(VerboseDebug, {
- dbgs() << "final fn:\n" << *Induction->getFunction() << "\n";
+ dbgs() << "final fn:\n" << *OrigLoop->getHeader()->getParent() << "\n";
});
}
@@ -9052,7 +8784,8 @@ VPBasicBlock *VPRecipeBuilder::handleReplication(
Range);
bool IsPredicated = LoopVectorizationPlanner::getDecisionAndClampRange(
- [&](ElementCount VF) { return CM.isPredicatedInst(I); }, Range);
+ [&](ElementCount VF) { return CM.isPredicatedInst(I, IsUniform); },
+ Range);
// Even if the instruction is not marked as uniform, there are certain
// intrinsic calls that can be effectively treated as such, so we check for
@@ -9354,7 +9087,9 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
if (VPBB)
VPBlockUtils::insertBlockAfter(FirstVPBBForBB, VPBB);
else {
- Plan->setEntry(FirstVPBBForBB);
+ auto *TopRegion = new VPRegionBlock("vector loop");
+ TopRegion->setEntry(FirstVPBBForBB);
+ Plan->setEntry(TopRegion);
HeaderVPBB = FirstVPBBForBB;
}
VPBB = FirstVPBBForBB;
@@ -9426,9 +9161,11 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
}
}
- assert(isa<VPBasicBlock>(Plan->getEntry()) &&
+ assert(isa<VPRegionBlock>(Plan->getEntry()) &&
!Plan->getEntry()->getEntryBasicBlock()->empty() &&
- "entry block must be set to a non-empty VPBasicBlock");
+ "entry block must be set to a VPRegionBlock having a non-empty entry "
+ "VPBasicBlock");
+ cast<VPRegionBlock>(Plan->getEntry())->setExit(VPBB);
RecipeBuilder.fixHeaderPhis();
// ---------------------------------------------------------------------------
@@ -9653,12 +9390,17 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
unsigned FirstOpId;
assert(!RecurrenceDescriptor::isSelectCmpRecurrenceKind(Kind) &&
"Only min/max recurrences allowed for inloop reductions");
+ // Recognize a call to the llvm.fmuladd intrinsic.
+ bool IsFMulAdd = (Kind == RecurKind::FMulAdd);
+ assert((!IsFMulAdd || RecurrenceDescriptor::isFMulAddIntrinsic(R)) &&
+ "Expected instruction to be a call to the llvm.fmuladd intrinsic");
if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind)) {
assert(isa<VPWidenSelectRecipe>(WidenRecipe) &&
"Expected to replace a VPWidenSelectSC");
FirstOpId = 1;
} else {
- assert((MinVF.isScalar() || isa<VPWidenRecipe>(WidenRecipe)) &&
+ assert((MinVF.isScalar() || isa<VPWidenRecipe>(WidenRecipe) ||
+ (IsFMulAdd && isa<VPWidenCallRecipe>(WidenRecipe))) &&
"Expected to replace a VPWidenSC");
FirstOpId = 0;
}
@@ -9669,8 +9411,20 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
auto *CondOp = CM.foldTailByMasking()
? RecipeBuilder.createBlockInMask(R->getParent(), Plan)
: nullptr;
- VPReductionRecipe *RedRecipe = new VPReductionRecipe(
- &RdxDesc, R, ChainOp, VecOp, CondOp, TTI);
+
+ if (IsFMulAdd) {
+ // If the instruction is a call to the llvm.fmuladd intrinsic then we
+ // need to create an fmul recipe to use as the vector operand for the
+ // fadd reduction.
+ VPInstruction *FMulRecipe = new VPInstruction(
+ Instruction::FMul, {VecOp, Plan->getVPValue(R->getOperand(1))});
+ FMulRecipe->setFastMathFlags(R->getFastMathFlags());
+ WidenRecipe->getParent()->insert(FMulRecipe,
+ WidenRecipe->getIterator());
+ VecOp = FMulRecipe;
+ }
+ VPReductionRecipe *RedRecipe =
+ new VPReductionRecipe(&RdxDesc, R, ChainOp, VecOp, CondOp, TTI);
WidenRecipe->getVPSingleValue()->replaceAllUsesWith(RedRecipe);
Plan->removeVPValueFor(R);
Plan->addVPValue(R, RedRecipe);
@@ -9744,18 +9498,218 @@ void VPWidenCallRecipe::execute(VPTransformState &State) {
}
void VPWidenSelectRecipe::execute(VPTransformState &State) {
- State.ILV->widenSelectInstruction(*cast<SelectInst>(getUnderlyingInstr()),
- this, *this, InvariantCond, State);
+ auto &I = *cast<SelectInst>(getUnderlyingInstr());
+ State.ILV->setDebugLocFromInst(&I);
+
+ // The condition can be loop invariant but still defined inside the
+ // loop. This means that we can't just use the original 'cond' value.
+ // We have to take the 'vectorized' value and pick the first lane.
+ // Instcombine will make this a no-op.
+ auto *InvarCond =
+ InvariantCond ? State.get(getOperand(0), VPIteration(0, 0)) : nullptr;
+
+ for (unsigned Part = 0; Part < State.UF; ++Part) {
+ Value *Cond = InvarCond ? InvarCond : State.get(getOperand(0), Part);
+ Value *Op0 = State.get(getOperand(1), Part);
+ Value *Op1 = State.get(getOperand(2), Part);
+ Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1);
+ State.set(this, Sel, Part);
+ State.ILV->addMetadata(Sel, &I);
+ }
}
void VPWidenRecipe::execute(VPTransformState &State) {
- State.ILV->widenInstruction(*getUnderlyingInstr(), this, *this, State);
+ auto &I = *cast<Instruction>(getUnderlyingValue());
+ auto &Builder = State.Builder;
+ switch (I.getOpcode()) {
+ case Instruction::Call:
+ case Instruction::Br:
+ case Instruction::PHI:
+ case Instruction::GetElementPtr:
+ case Instruction::Select:
+ llvm_unreachable("This instruction is handled by a different recipe.");
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::SRem:
+ case Instruction::URem:
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::FNeg:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::FDiv:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: {
+ // Just widen unops and binops.
+ State.ILV->setDebugLocFromInst(&I);
+
+ for (unsigned Part = 0; Part < State.UF; ++Part) {
+ SmallVector<Value *, 2> Ops;
+ for (VPValue *VPOp : operands())
+ Ops.push_back(State.get(VPOp, Part));
+
+ Value *V = Builder.CreateNAryOp(I.getOpcode(), Ops);
+
+ if (auto *VecOp = dyn_cast<Instruction>(V)) {
+ VecOp->copyIRFlags(&I);
+
+ // If the instruction is vectorized and was in a basic block that needed
+ // predication, we can't propagate poison-generating flags (nuw/nsw,
+ // exact, etc.). The control flow has been linearized and the
+ // instruction is no longer guarded by the predicate, which could make
+ // the flag properties to no longer hold.
+ if (State.MayGeneratePoisonRecipes.count(this) > 0)
+ VecOp->dropPoisonGeneratingFlags();
+ }
+
+ // Use this vector value for all users of the original instruction.
+ State.set(this, V, Part);
+ State.ILV->addMetadata(V, &I);
+ }
+
+ break;
+ }
+ case Instruction::ICmp:
+ case Instruction::FCmp: {
+ // Widen compares. Generate vector compares.
+ bool FCmp = (I.getOpcode() == Instruction::FCmp);
+ auto *Cmp = cast<CmpInst>(&I);
+ State.ILV->setDebugLocFromInst(Cmp);
+ for (unsigned Part = 0; Part < State.UF; ++Part) {
+ Value *A = State.get(getOperand(0), Part);
+ Value *B = State.get(getOperand(1), Part);
+ Value *C = nullptr;
+ if (FCmp) {
+ // Propagate fast math flags.
+ IRBuilder<>::FastMathFlagGuard FMFG(Builder);
+ Builder.setFastMathFlags(Cmp->getFastMathFlags());
+ C = Builder.CreateFCmp(Cmp->getPredicate(), A, B);
+ } else {
+ C = Builder.CreateICmp(Cmp->getPredicate(), A, B);
+ }
+ State.set(this, C, Part);
+ State.ILV->addMetadata(C, &I);
+ }
+
+ break;
+ }
+
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::FPExt:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ case Instruction::Trunc:
+ case Instruction::FPTrunc:
+ case Instruction::BitCast: {
+ auto *CI = cast<CastInst>(&I);
+ State.ILV->setDebugLocFromInst(CI);
+
+ /// Vectorize casts.
+ Type *DestTy = (State.VF.isScalar())
+ ? CI->getType()
+ : VectorType::get(CI->getType(), State.VF);
+
+ for (unsigned Part = 0; Part < State.UF; ++Part) {
+ Value *A = State.get(getOperand(0), Part);
+ Value *Cast = Builder.CreateCast(CI->getOpcode(), A, DestTy);
+ State.set(this, Cast, Part);
+ State.ILV->addMetadata(Cast, &I);
+ }
+ break;
+ }
+ default:
+ // This instruction is not vectorized by simple widening.
+ LLVM_DEBUG(dbgs() << "LV: Found an unhandled instruction: " << I);
+ llvm_unreachable("Unhandled instruction!");
+ } // end of switch.
}
void VPWidenGEPRecipe::execute(VPTransformState &State) {
- State.ILV->widenGEP(cast<GetElementPtrInst>(getUnderlyingInstr()), this,
- *this, State.UF, State.VF, IsPtrLoopInvariant,
- IsIndexLoopInvariant, State);
+ auto *GEP = cast<GetElementPtrInst>(getUnderlyingInstr());
+ // Construct a vector GEP by widening the operands of the scalar GEP as
+ // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP
+ // results in a vector of pointers when at least one operand of the GEP
+ // is vector-typed. Thus, to keep the representation compact, we only use
+ // vector-typed operands for loop-varying values.
+
+ if (State.VF.isVector() && IsPtrLoopInvariant && IsIndexLoopInvariant.all()) {
+ // If we are vectorizing, but the GEP has only loop-invariant operands,
+ // the GEP we build (by only using vector-typed operands for
+ // loop-varying values) would be a scalar pointer. Thus, to ensure we
+ // produce a vector of pointers, we need to either arbitrarily pick an
+ // operand to broadcast, or broadcast a clone of the original GEP.
+ // Here, we broadcast a clone of the original.
+ //
+ // TODO: If at some point we decide to scalarize instructions having
+ // loop-invariant operands, this special case will no longer be
+ // required. We would add the scalarization decision to
+ // collectLoopScalars() and teach getVectorValue() to broadcast
+ // the lane-zero scalar value.
+ auto *Clone = State.Builder.Insert(GEP->clone());
+ for (unsigned Part = 0; Part < State.UF; ++Part) {
+ Value *EntryPart = State.Builder.CreateVectorSplat(State.VF, Clone);
+ State.set(this, EntryPart, Part);
+ State.ILV->addMetadata(EntryPart, GEP);
+ }
+ } else {
+ // If the GEP has at least one loop-varying operand, we are sure to
+ // produce a vector of pointers. But if we are only unrolling, we want
+ // to produce a scalar GEP for each unroll part. Thus, the GEP we
+ // produce with the code below will be scalar (if VF == 1) or vector
+ // (otherwise). Note that for the unroll-only case, we still maintain
+ // values in the vector mapping with initVector, as we do for other
+ // instructions.
+ for (unsigned Part = 0; Part < State.UF; ++Part) {
+ // The pointer operand of the new GEP. If it's loop-invariant, we
+ // won't broadcast it.
+ auto *Ptr = IsPtrLoopInvariant
+ ? State.get(getOperand(0), VPIteration(0, 0))
+ : State.get(getOperand(0), Part);
+
+ // Collect all the indices for the new GEP. If any index is
+ // loop-invariant, we won't broadcast it.
+ SmallVector<Value *, 4> Indices;
+ for (unsigned I = 1, E = getNumOperands(); I < E; I++) {
+ VPValue *Operand = getOperand(I);
+ if (IsIndexLoopInvariant[I - 1])
+ Indices.push_back(State.get(Operand, VPIteration(0, 0)));
+ else
+ Indices.push_back(State.get(Operand, Part));
+ }
+
+ // If the GEP instruction is vectorized and was in a basic block that
+ // needed predication, we can't propagate the poison-generating 'inbounds'
+ // flag. The control flow has been linearized and the GEP is no longer
+ // guarded by the predicate, which could make the 'inbounds' properties to
+ // no longer hold.
+ bool IsInBounds =
+ GEP->isInBounds() && State.MayGeneratePoisonRecipes.count(this) == 0;
+
+ // Create the new GEP. Note that this GEP may be a scalar if VF == 1,
+ // but it should be a vector, otherwise.
+ auto *NewGEP = IsInBounds
+ ? State.Builder.CreateInBoundsGEP(
+ GEP->getSourceElementType(), Ptr, Indices)
+ : State.Builder.CreateGEP(GEP->getSourceElementType(),
+ Ptr, Indices);
+ assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) &&
+ "NewGEP is not a pointer vector");
+ State.set(this, NewGEP, Part);
+ State.ILV->addMetadata(NewGEP, GEP);
+ }
+ }
}
void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) {
@@ -9867,8 +9821,8 @@ void VPReductionRecipe::execute(VPTransformState &State) {
void VPReplicateRecipe::execute(VPTransformState &State) {
if (State.Instance) { // Generate a single instance.
assert(!State.VF.isScalable() && "Can't scalarize a scalable vector");
- State.ILV->scalarizeInstruction(getUnderlyingInstr(), this, *this,
- *State.Instance, IsPredicated, State);
+ State.ILV->scalarizeInstruction(getUnderlyingInstr(), this, *State.Instance,
+ IsPredicated, State);
// Insert scalar instance packing it into a vector.
if (AlsoPack && State.VF.isVector()) {
// If we're constructing lane 0, initialize to start from poison.
@@ -9891,7 +9845,7 @@ void VPReplicateRecipe::execute(VPTransformState &State) {
"Can't scalarize a scalable vector");
for (unsigned Part = 0; Part < State.UF; ++Part)
for (unsigned Lane = 0; Lane < EndLane; ++Lane)
- State.ILV->scalarizeInstruction(getUnderlyingInstr(), this, *this,
+ State.ILV->scalarizeInstruction(getUnderlyingInstr(), this,
VPIteration(Part, Lane), IsPredicated,
State);
}
@@ -9970,9 +9924,129 @@ void VPPredInstPHIRecipe::execute(VPTransformState &State) {
void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
VPValue *StoredValue = isStore() ? getStoredValue() : nullptr;
- State.ILV->vectorizeMemoryInstruction(
- &Ingredient, State, StoredValue ? nullptr : getVPSingleValue(), getAddr(),
- StoredValue, getMask(), Consecutive, Reverse);
+
+ // Attempt to issue a wide load.
+ LoadInst *LI = dyn_cast<LoadInst>(&Ingredient);
+ StoreInst *SI = dyn_cast<StoreInst>(&Ingredient);
+
+ assert((LI || SI) && "Invalid Load/Store instruction");
+ assert((!SI || StoredValue) && "No stored value provided for widened store");
+ assert((!LI || !StoredValue) && "Stored value provided for widened load");
+
+ Type *ScalarDataTy = getLoadStoreType(&Ingredient);
+
+ auto *DataTy = VectorType::get(ScalarDataTy, State.VF);
+ const Align Alignment = getLoadStoreAlignment(&Ingredient);
+ bool CreateGatherScatter = !Consecutive;
+
+ auto &Builder = State.Builder;
+ InnerLoopVectorizer::VectorParts BlockInMaskParts(State.UF);
+ bool isMaskRequired = getMask();
+ if (isMaskRequired)
+ for (unsigned Part = 0; Part < State.UF; ++Part)
+ BlockInMaskParts[Part] = State.get(getMask(), Part);
+
+ const auto CreateVecPtr = [&](unsigned Part, Value *Ptr) -> Value * {
+ // Calculate the pointer for the specific unroll-part.
+ GetElementPtrInst *PartPtr = nullptr;
+
+ bool InBounds = false;
+ if (auto *gep = dyn_cast<GetElementPtrInst>(Ptr->stripPointerCasts()))
+ InBounds = gep->isInBounds();
+ if (Reverse) {
+ // If the address is consecutive but reversed, then the
+ // wide store needs to start at the last vector element.
+ // RunTimeVF = VScale * VF.getKnownMinValue()
+ // For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue()
+ Value *RunTimeVF = getRuntimeVF(Builder, Builder.getInt32Ty(), State.VF);
+ // NumElt = -Part * RunTimeVF
+ Value *NumElt = Builder.CreateMul(Builder.getInt32(-Part), RunTimeVF);
+ // LastLane = 1 - RunTimeVF
+ Value *LastLane = Builder.CreateSub(Builder.getInt32(1), RunTimeVF);
+ PartPtr =
+ cast<GetElementPtrInst>(Builder.CreateGEP(ScalarDataTy, Ptr, NumElt));
+ PartPtr->setIsInBounds(InBounds);
+ PartPtr = cast<GetElementPtrInst>(
+ Builder.CreateGEP(ScalarDataTy, PartPtr, LastLane));
+ PartPtr->setIsInBounds(InBounds);
+ if (isMaskRequired) // Reverse of a null all-one mask is a null mask.
+ BlockInMaskParts[Part] =
+ Builder.CreateVectorReverse(BlockInMaskParts[Part], "reverse");
+ } else {
+ Value *Increment =
+ createStepForVF(Builder, Builder.getInt32Ty(), State.VF, Part);
+ PartPtr = cast<GetElementPtrInst>(
+ Builder.CreateGEP(ScalarDataTy, Ptr, Increment));
+ PartPtr->setIsInBounds(InBounds);
+ }
+
+ unsigned AddressSpace = Ptr->getType()->getPointerAddressSpace();
+ return Builder.CreateBitCast(PartPtr, DataTy->getPointerTo(AddressSpace));
+ };
+
+ // Handle Stores:
+ if (SI) {
+ State.ILV->setDebugLocFromInst(SI);
+
+ for (unsigned Part = 0; Part < State.UF; ++Part) {
+ Instruction *NewSI = nullptr;
+ Value *StoredVal = State.get(StoredValue, Part);
+ if (CreateGatherScatter) {
+ Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
+ Value *VectorGep = State.get(getAddr(), Part);
+ NewSI = Builder.CreateMaskedScatter(StoredVal, VectorGep, Alignment,
+ MaskPart);
+ } else {
+ if (Reverse) {
+ // If we store to reverse consecutive memory locations, then we need
+ // to reverse the order of elements in the stored value.
+ StoredVal = Builder.CreateVectorReverse(StoredVal, "reverse");
+ // We don't want to update the value in the map as it might be used in
+ // another expression. So don't call resetVectorValue(StoredVal).
+ }
+ auto *VecPtr =
+ CreateVecPtr(Part, State.get(getAddr(), VPIteration(0, 0)));
+ if (isMaskRequired)
+ NewSI = Builder.CreateMaskedStore(StoredVal, VecPtr, Alignment,
+ BlockInMaskParts[Part]);
+ else
+ NewSI = Builder.CreateAlignedStore(StoredVal, VecPtr, Alignment);
+ }
+ State.ILV->addMetadata(NewSI, SI);
+ }
+ return;
+ }
+
+ // Handle loads.
+ assert(LI && "Must have a load instruction");
+ State.ILV->setDebugLocFromInst(LI);
+ for (unsigned Part = 0; Part < State.UF; ++Part) {
+ Value *NewLI;
+ if (CreateGatherScatter) {
+ Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
+ Value *VectorGep = State.get(getAddr(), Part);
+ NewLI = Builder.CreateMaskedGather(DataTy, VectorGep, Alignment, MaskPart,
+ nullptr, "wide.masked.gather");
+ State.ILV->addMetadata(NewLI, LI);
+ } else {
+ auto *VecPtr =
+ CreateVecPtr(Part, State.get(getAddr(), VPIteration(0, 0)));
+ if (isMaskRequired)
+ NewLI = Builder.CreateMaskedLoad(
+ DataTy, VecPtr, Alignment, BlockInMaskParts[Part],
+ PoisonValue::get(DataTy), "wide.masked.load");
+ else
+ NewLI =
+ Builder.CreateAlignedLoad(DataTy, VecPtr, Alignment, "wide.load");
+
+ // Add metadata to the load, but setVectorValue to the reverse shuffle.
+ State.ILV->addMetadata(NewLI, LI);
+ if (Reverse)
+ NewLI = Builder.CreateVectorReverse(NewLI, "reverse");
+ }
+
+ State.set(getVPSingleValue(), NewLI, Part);
+ }
}
// Determine how to lower the scalar epilogue, which depends on 1) optimising
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index e3ef0b794f68..95061e9053fa 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -283,6 +283,26 @@ static bool isCommutative(Instruction *I) {
return false;
}
+/// Checks if the given value is actually an undefined constant vector.
+static bool isUndefVector(const Value *V) {
+ if (isa<UndefValue>(V))
+ return true;
+ auto *C = dyn_cast<Constant>(V);
+ if (!C)
+ return false;
+ if (!C->containsUndefOrPoisonElement())
+ return false;
+ auto *VecTy = dyn_cast<FixedVectorType>(C->getType());
+ if (!VecTy)
+ return false;
+ for (unsigned I = 0, E = VecTy->getNumElements(); I != E; ++I) {
+ if (Constant *Elem = C->getAggregateElement(I))
+ if (!isa<UndefValue>(Elem))
+ return false;
+ }
+ return true;
+}
+
/// Checks if the vector of instructions can be represented as a shuffle, like:
/// %x0 = extractelement <4 x i8> %x, i32 0
/// %x3 = extractelement <4 x i8> %x, i32 3
@@ -327,7 +347,11 @@ static bool isCommutative(Instruction *I) {
/// TargetTransformInfo::getInstructionThroughput?
static Optional<TargetTransformInfo::ShuffleKind>
isFixedVectorShuffle(ArrayRef<Value *> VL, SmallVectorImpl<int> &Mask) {
- auto *EI0 = cast<ExtractElementInst>(VL[0]);
+ const auto *It =
+ find_if(VL, [](Value *V) { return isa<ExtractElementInst>(V); });
+ if (It == VL.end())
+ return None;
+ auto *EI0 = cast<ExtractElementInst>(*It);
if (isa<ScalableVectorType>(EI0->getVectorOperandType()))
return None;
unsigned Size =
@@ -336,33 +360,41 @@ isFixedVectorShuffle(ArrayRef<Value *> VL, SmallVectorImpl<int> &Mask) {
Value *Vec2 = nullptr;
enum ShuffleMode { Unknown, Select, Permute };
ShuffleMode CommonShuffleMode = Unknown;
+ Mask.assign(VL.size(), UndefMaskElem);
for (unsigned I = 0, E = VL.size(); I < E; ++I) {
+ // Undef can be represented as an undef element in a vector.
+ if (isa<UndefValue>(VL[I]))
+ continue;
auto *EI = cast<ExtractElementInst>(VL[I]);
+ if (isa<ScalableVectorType>(EI->getVectorOperandType()))
+ return None;
auto *Vec = EI->getVectorOperand();
+ // We can extractelement from undef or poison vector.
+ if (isUndefVector(Vec))
+ continue;
// All vector operands must have the same number of vector elements.
if (cast<FixedVectorType>(Vec->getType())->getNumElements() != Size)
return None;
+ if (isa<UndefValue>(EI->getIndexOperand()))
+ continue;
auto *Idx = dyn_cast<ConstantInt>(EI->getIndexOperand());
if (!Idx)
return None;
// Undefined behavior if Idx is negative or >= Size.
- if (Idx->getValue().uge(Size)) {
- Mask.push_back(UndefMaskElem);
+ if (Idx->getValue().uge(Size))
continue;
- }
unsigned IntIdx = Idx->getValue().getZExtValue();
- Mask.push_back(IntIdx);
- // We can extractelement from undef or poison vector.
- if (isa<UndefValue>(Vec))
- continue;
+ Mask[I] = IntIdx;
// For correct shuffling we have to have at most 2 different vector operands
// in all extractelement instructions.
- if (!Vec1 || Vec1 == Vec)
+ if (!Vec1 || Vec1 == Vec) {
Vec1 = Vec;
- else if (!Vec2 || Vec2 == Vec)
+ } else if (!Vec2 || Vec2 == Vec) {
Vec2 = Vec;
- else
+ Mask[I] += Size;
+ } else {
return None;
+ }
if (CommonShuffleMode == Permute)
continue;
// If the extract index is not the same as the operation number, it is a
@@ -1680,6 +1712,28 @@ private:
return IsSame(Scalars, ReuseShuffleIndices);
}
+ /// \returns true if current entry has same operands as \p TE.
+ bool hasEqualOperands(const TreeEntry &TE) const {
+ if (TE.getNumOperands() != getNumOperands())
+ return false;
+ SmallBitVector Used(getNumOperands());
+ for (unsigned I = 0, E = getNumOperands(); I < E; ++I) {
+ unsigned PrevCount = Used.count();
+ for (unsigned K = 0; K < E; ++K) {
+ if (Used.test(K))
+ continue;
+ if (getOperand(K) == TE.getOperand(I)) {
+ Used.set(K);
+ break;
+ }
+ }
+ // Check if we actually found the matching operand.
+ if (PrevCount == Used.count())
+ return false;
+ }
+ return true;
+ }
+
/// \return Final vectorization factor for the node. Defined by the total
/// number of vectorized scalars, including those, used several times in the
/// entry and counted in the \a ReuseShuffleIndices, if any.
@@ -1773,6 +1827,12 @@ private:
return Operands[OpIdx];
}
+ /// \returns the \p OpIdx operand of this TreeEntry.
+ ArrayRef<Value *> getOperand(unsigned OpIdx) const {
+ assert(OpIdx < Operands.size() && "Off bounds");
+ return Operands[OpIdx];
+ }
+
/// \returns the number of operands.
unsigned getNumOperands() const { return Operands.size(); }
@@ -2078,7 +2138,7 @@ private:
SmallPtrSet<const Value *, 32> EphValues;
/// Holds all of the instructions that we gathered.
- SetVector<Instruction *> GatherSeq;
+ SetVector<Instruction *> GatherShuffleSeq;
/// A list of blocks that we are going to CSE.
SetVector<BasicBlock *> CSEBlocks;
@@ -4386,15 +4446,19 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
bool IsGather) {
DenseMap<Value *, int> ExtractVectorsTys;
for (auto *V : VL) {
+ if (isa<UndefValue>(V))
+ continue;
// If all users of instruction are going to be vectorized and this
// instruction itself is not going to be vectorized, consider this
// instruction as dead and remove its cost from the final cost of the
// vectorized tree.
- if (!areAllUsersVectorized(cast<Instruction>(V), VectorizedVals) ||
- (IsGather && ScalarToTreeEntry.count(V)))
+ if (!areAllUsersVectorized(cast<Instruction>(V), VectorizedVals))
continue;
auto *EE = cast<ExtractElementInst>(V);
- unsigned Idx = *getExtractIndex(EE);
+ Optional<unsigned> EEIdx = getExtractIndex(EE);
+ if (!EEIdx)
+ continue;
+ unsigned Idx = *EEIdx;
if (TTIRef.getNumberOfParts(VecTy) !=
TTIRef.getNumberOfParts(EE->getVectorOperandType())) {
auto It =
@@ -4426,6 +4490,8 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
for (const auto &Data : ExtractVectorsTys) {
auto *EEVTy = cast<FixedVectorType>(Data.first->getType());
unsigned NumElts = VecTy->getNumElements();
+ if (Data.second % NumElts == 0)
+ continue;
if (TTIRef.getNumberOfParts(EEVTy) > TTIRef.getNumberOfParts(VecTy)) {
unsigned Idx = (Data.second / NumElts) * NumElts;
unsigned EENumElts = EEVTy->getNumElements();
@@ -4488,10 +4554,12 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
// broadcast.
return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy);
}
- if (E->getOpcode() == Instruction::ExtractElement && allSameType(VL) &&
- allSameBlock(VL) &&
- !isa<ScalableVectorType>(
- cast<ExtractElementInst>(E->getMainOp())->getVectorOperandType())) {
+ if ((E->getOpcode() == Instruction::ExtractElement ||
+ all_of(E->Scalars,
+ [](Value *V) {
+ return isa<ExtractElementInst, UndefValue>(V);
+ })) &&
+ allSameType(VL)) {
// Check that gather of extractelements can be represented as just a
// shuffle of a single/two vectors the scalars are extracted from.
SmallVector<int> Mask;
@@ -4738,7 +4806,7 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
return !is_contained(E->Scalars,
cast<Instruction>(V)->getOperand(0));
}));
- if (isa<UndefValue>(FirstInsert->getOperand(0))) {
+ if (isUndefVector(FirstInsert->getOperand(0))) {
Cost += TTI->getShuffleCost(TTI::SK_PermuteSingleSrc, SrcVecTy, Mask);
} else {
SmallVector<int> InsertMask(NumElts);
@@ -5016,7 +5084,30 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
// VecCost is equal to sum of the cost of creating 2 vectors
// and the cost of creating shuffle.
InstructionCost VecCost = 0;
- if (Instruction::isBinaryOp(E->getOpcode())) {
+ // Try to find the previous shuffle node with the same operands and same
+ // main/alternate ops.
+ auto &&TryFindNodeWithEqualOperands = [this, E]() {
+ for (const std::unique_ptr<TreeEntry> &TE : VectorizableTree) {
+ if (TE.get() == E)
+ break;
+ if (TE->isAltShuffle() &&
+ ((TE->getOpcode() == E->getOpcode() &&
+ TE->getAltOpcode() == E->getAltOpcode()) ||
+ (TE->getOpcode() == E->getAltOpcode() &&
+ TE->getAltOpcode() == E->getOpcode())) &&
+ TE->hasEqualOperands(*E))
+ return true;
+ }
+ return false;
+ };
+ if (TryFindNodeWithEqualOperands()) {
+ LLVM_DEBUG({
+ dbgs() << "SLP: diamond match for alternate node found.\n";
+ E->dump();
+ });
+ // No need to add new vector costs here since we're going to reuse
+ // same main/alternate vector ops, just do different shuffling.
+ } else if (Instruction::isBinaryOp(E->getOpcode())) {
VecCost = TTI->getArithmeticInstrCost(E->getOpcode(), VecTy, CostKind);
VecCost += TTI->getArithmeticInstrCost(E->getAltOpcode(), VecTy,
CostKind);
@@ -5060,7 +5151,11 @@ bool BoUpSLP::isFullyVectorizableTinyTree(bool ForReduction) const {
[this](Value *V) { return EphValues.contains(V); }) &&
(allConstant(TE->Scalars) || isSplat(TE->Scalars) ||
TE->Scalars.size() < Limit ||
- (TE->getOpcode() == Instruction::ExtractElement &&
+ ((TE->getOpcode() == Instruction::ExtractElement ||
+ all_of(TE->Scalars,
+ [](Value *V) {
+ return isa<ExtractElementInst, UndefValue>(V);
+ })) &&
isFixedVectorShuffle(TE->Scalars, Mask)) ||
(TE->State == TreeEntry::NeedToGather &&
TE->getOpcode() == Instruction::Load && !TE->isAltShuffle()));
@@ -5280,6 +5375,42 @@ InstructionCost BoUpSLP::getSpillCost() const {
return Cost;
}
+/// Check if two insertelement instructions are from the same buildvector.
+static bool areTwoInsertFromSameBuildVector(InsertElementInst *VU,
+ InsertElementInst *V) {
+ // Instructions must be from the same basic blocks.
+ if (VU->getParent() != V->getParent())
+ return false;
+ // Checks if 2 insertelements are from the same buildvector.
+ if (VU->getType() != V->getType())
+ return false;
+ // Multiple used inserts are separate nodes.
+ if (!VU->hasOneUse() && !V->hasOneUse())
+ return false;
+ auto *IE1 = VU;
+ auto *IE2 = V;
+ // Go through the vector operand of insertelement instructions trying to find
+ // either VU as the original vector for IE2 or V as the original vector for
+ // IE1.
+ do {
+ if (IE2 == VU || IE1 == V)
+ return true;
+ if (IE1) {
+ if (IE1 != VU && !IE1->hasOneUse())
+ IE1 = nullptr;
+ else
+ IE1 = dyn_cast<InsertElementInst>(IE1->getOperand(0));
+ }
+ if (IE2) {
+ if (IE2 != V && !IE2->hasOneUse())
+ IE2 = nullptr;
+ else
+ IE2 = dyn_cast<InsertElementInst>(IE2->getOperand(0));
+ }
+ } while (IE1 || IE2);
+ return false;
+}
+
InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
InstructionCost Cost = 0;
LLVM_DEBUG(dbgs() << "SLP: Calculating cost for tree of size "
@@ -5306,7 +5437,8 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
SmallVector<APInt> DemandedElts;
for (ExternalUser &EU : ExternalUses) {
// We only add extract cost once for the same scalar.
- if (!ExtractCostCalculated.insert(EU.Scalar).second)
+ if (!isa_and_nonnull<InsertElementInst>(EU.User) &&
+ !ExtractCostCalculated.insert(EU.Scalar).second)
continue;
// Uses by ephemeral values are free (because the ephemeral value will be
@@ -5326,35 +5458,35 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
// If found user is an insertelement, do not calculate extract cost but try
// to detect it as a final shuffled/identity match.
- if (isa_and_nonnull<InsertElementInst>(EU.User)) {
- if (auto *FTy = dyn_cast<FixedVectorType>(EU.User->getType())) {
- Optional<int> InsertIdx = getInsertIndex(EU.User, 0);
+ if (auto *VU = dyn_cast_or_null<InsertElementInst>(EU.User)) {
+ if (auto *FTy = dyn_cast<FixedVectorType>(VU->getType())) {
+ Optional<int> InsertIdx = getInsertIndex(VU, 0);
if (!InsertIdx || *InsertIdx == UndefMaskElem)
continue;
- Value *VU = EU.User;
auto *It = find_if(FirstUsers, [VU](Value *V) {
- // Checks if 2 insertelements are from the same buildvector.
- if (VU->getType() != V->getType())
- return false;
- auto *IE1 = cast<InsertElementInst>(VU);
- auto *IE2 = cast<InsertElementInst>(V);
- // Go through of insertelement instructions trying to find either VU
- // as the original vector for IE2 or V as the original vector for IE1.
- do {
- if (IE1 == VU || IE2 == V)
- return true;
- if (IE1)
- IE1 = dyn_cast<InsertElementInst>(IE1->getOperand(0));
- if (IE2)
- IE2 = dyn_cast<InsertElementInst>(IE2->getOperand(0));
- } while (IE1 || IE2);
- return false;
+ return areTwoInsertFromSameBuildVector(VU,
+ cast<InsertElementInst>(V));
});
int VecId = -1;
if (It == FirstUsers.end()) {
VF.push_back(FTy->getNumElements());
ShuffleMask.emplace_back(VF.back(), UndefMaskElem);
- FirstUsers.push_back(EU.User);
+ // Find the insertvector, vectorized in tree, if any.
+ Value *Base = VU;
+ while (isa<InsertElementInst>(Base)) {
+ // Build the mask for the vectorized insertelement instructions.
+ if (const TreeEntry *E = getTreeEntry(Base)) {
+ VU = cast<InsertElementInst>(Base);
+ do {
+ int Idx = E->findLaneForValue(Base);
+ ShuffleMask.back()[Idx] = Idx;
+ Base = cast<InsertElementInst>(Base)->getOperand(0);
+ } while (E == getTreeEntry(Base));
+ break;
+ }
+ Base = cast<InsertElementInst>(Base)->getOperand(0);
+ }
+ FirstUsers.push_back(VU);
DemandedElts.push_back(APInt::getZero(VF.back()));
VecId = FirstUsers.size() - 1;
} else {
@@ -5363,6 +5495,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
int Idx = *InsertIdx;
ShuffleMask[VecId][Idx] = EU.Lane;
DemandedElts[VecId].setBit(Idx);
+ continue;
}
}
@@ -5386,47 +5519,86 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
InstructionCost SpillCost = getSpillCost();
Cost += SpillCost + ExtractCost;
- for (int I = 0, E = FirstUsers.size(); I < E; ++I) {
- // For the very first element - simple shuffle of the source vector.
- int Limit = ShuffleMask[I].size() * 2;
- if (I == 0 &&
- all_of(ShuffleMask[I], [Limit](int Idx) { return Idx < Limit; }) &&
- !ShuffleVectorInst::isIdentityMask(ShuffleMask[I])) {
+ if (FirstUsers.size() == 1) {
+ int Limit = ShuffleMask.front().size() * 2;
+ if (all_of(ShuffleMask.front(), [Limit](int Idx) { return Idx < Limit; }) &&
+ !ShuffleVectorInst::isIdentityMask(ShuffleMask.front())) {
InstructionCost C = TTI->getShuffleCost(
TTI::SK_PermuteSingleSrc,
- cast<FixedVectorType>(FirstUsers[I]->getType()), ShuffleMask[I]);
+ cast<FixedVectorType>(FirstUsers.front()->getType()),
+ ShuffleMask.front());
LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C
<< " for final shuffle of insertelement external users "
<< *VectorizableTree.front()->Scalars.front() << ".\n"
<< "SLP: Current total cost = " << Cost << "\n");
Cost += C;
- continue;
}
- // Other elements - permutation of 2 vectors (the initial one and the next
- // Ith incoming vector).
- unsigned VF = ShuffleMask[I].size();
- for (unsigned Idx = 0; Idx < VF; ++Idx) {
- int &Mask = ShuffleMask[I][Idx];
- Mask = Mask == UndefMaskElem ? Idx : VF + Mask;
- }
- InstructionCost C = TTI->getShuffleCost(
- TTI::SK_PermuteTwoSrc, cast<FixedVectorType>(FirstUsers[I]->getType()),
- ShuffleMask[I]);
- LLVM_DEBUG(
- dbgs()
- << "SLP: Adding cost " << C
- << " for final shuffle of vector node and external insertelement users "
- << *VectorizableTree.front()->Scalars.front() << ".\n"
- << "SLP: Current total cost = " << Cost << "\n");
- Cost += C;
InstructionCost InsertCost = TTI->getScalarizationOverhead(
- cast<FixedVectorType>(FirstUsers[I]->getType()), DemandedElts[I],
- /*Insert*/ true,
- /*Extract*/ false);
+ cast<FixedVectorType>(FirstUsers.front()->getType()),
+ DemandedElts.front(), /*Insert*/ true, /*Extract*/ false);
+ LLVM_DEBUG(dbgs() << "SLP: subtracting the cost " << InsertCost
+ << " for insertelements gather.\n"
+ << "SLP: Current total cost = " << Cost << "\n");
Cost -= InsertCost;
+ } else if (FirstUsers.size() >= 2) {
+ unsigned MaxVF = *std::max_element(VF.begin(), VF.end());
+ // Combined masks of the first 2 vectors.
+ SmallVector<int> CombinedMask(MaxVF, UndefMaskElem);
+ copy(ShuffleMask.front(), CombinedMask.begin());
+ APInt CombinedDemandedElts = DemandedElts.front().zextOrSelf(MaxVF);
+ auto *VecTy = FixedVectorType::get(
+ cast<VectorType>(FirstUsers.front()->getType())->getElementType(),
+ MaxVF);
+ for (int I = 0, E = ShuffleMask[1].size(); I < E; ++I) {
+ if (ShuffleMask[1][I] != UndefMaskElem) {
+ CombinedMask[I] = ShuffleMask[1][I] + MaxVF;
+ CombinedDemandedElts.setBit(I);
+ }
+ }
+ InstructionCost C =
+ TTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, CombinedMask);
+ LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C
+ << " for final shuffle of vector node and external "
+ "insertelement users "
+ << *VectorizableTree.front()->Scalars.front() << ".\n"
+ << "SLP: Current total cost = " << Cost << "\n");
+ Cost += C;
+ InstructionCost InsertCost = TTI->getScalarizationOverhead(
+ VecTy, CombinedDemandedElts, /*Insert*/ true, /*Extract*/ false);
LLVM_DEBUG(dbgs() << "SLP: subtracting the cost " << InsertCost
<< " for insertelements gather.\n"
<< "SLP: Current total cost = " << Cost << "\n");
+ Cost -= InsertCost;
+ for (int I = 2, E = FirstUsers.size(); I < E; ++I) {
+ // Other elements - permutation of 2 vectors (the initial one and the
+ // next Ith incoming vector).
+ unsigned VF = ShuffleMask[I].size();
+ for (unsigned Idx = 0; Idx < VF; ++Idx) {
+ int Mask = ShuffleMask[I][Idx];
+ if (Mask != UndefMaskElem)
+ CombinedMask[Idx] = MaxVF + Mask;
+ else if (CombinedMask[Idx] != UndefMaskElem)
+ CombinedMask[Idx] = Idx;
+ }
+ for (unsigned Idx = VF; Idx < MaxVF; ++Idx)
+ if (CombinedMask[Idx] != UndefMaskElem)
+ CombinedMask[Idx] = Idx;
+ InstructionCost C =
+ TTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, CombinedMask);
+ LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C
+ << " for final shuffle of vector node and external "
+ "insertelement users "
+ << *VectorizableTree.front()->Scalars.front() << ".\n"
+ << "SLP: Current total cost = " << Cost << "\n");
+ Cost += C;
+ InstructionCost InsertCost = TTI->getScalarizationOverhead(
+ cast<FixedVectorType>(FirstUsers[I]->getType()), DemandedElts[I],
+ /*Insert*/ true, /*Extract*/ false);
+ LLVM_DEBUG(dbgs() << "SLP: subtracting the cost " << InsertCost
+ << " for insertelements gather.\n"
+ << "SLP: Current total cost = " << Cost << "\n");
+ Cost -= InsertCost;
+ }
}
#ifndef NDEBUG
@@ -5728,7 +5900,7 @@ Value *BoUpSLP::gather(ArrayRef<Value *> VL) {
auto *InsElt = dyn_cast<InsertElementInst>(Vec);
if (!InsElt)
return Vec;
- GatherSeq.insert(InsElt);
+ GatherShuffleSeq.insert(InsElt);
CSEBlocks.insert(InsElt->getParent());
// Add to our 'need-to-extract' list.
if (TreeEntry *Entry = getTreeEntry(V)) {
@@ -5771,10 +5943,17 @@ class ShuffleInstructionBuilder {
const unsigned VF = 0;
bool IsFinalized = false;
SmallVector<int, 4> Mask;
+ /// Holds all of the instructions that we gathered.
+ SetVector<Instruction *> &GatherShuffleSeq;
+ /// A list of blocks that we are going to CSE.
+ SetVector<BasicBlock *> &CSEBlocks;
public:
- ShuffleInstructionBuilder(IRBuilderBase &Builder, unsigned VF)
- : Builder(Builder), VF(VF) {}
+ ShuffleInstructionBuilder(IRBuilderBase &Builder, unsigned VF,
+ SetVector<Instruction *> &GatherShuffleSeq,
+ SetVector<BasicBlock *> &CSEBlocks)
+ : Builder(Builder), VF(VF), GatherShuffleSeq(GatherShuffleSeq),
+ CSEBlocks(CSEBlocks) {}
/// Adds a mask, inverting it before applying.
void addInversedMask(ArrayRef<unsigned> SubMask) {
@@ -5804,7 +5983,12 @@ public:
if (VF == ValueVF && ShuffleVectorInst::isIdentityMask(Mask))
return V;
- return Builder.CreateShuffleVector(V, Mask, "shuffle");
+ Value *Vec = Builder.CreateShuffleVector(V, Mask, "shuffle");
+ if (auto *I = dyn_cast<Instruction>(Vec)) {
+ GatherShuffleSeq.insert(I);
+ CSEBlocks.insert(I->getParent());
+ }
+ return Vec;
}
~ShuffleInstructionBuilder() {
@@ -5862,6 +6046,10 @@ Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) {
std::iota(UniformMask.begin(), UniformMask.end(), 0);
V = Builder.CreateShuffleVector(V, UniformMask, "shrink.shuffle");
}
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ GatherShuffleSeq.insert(I);
+ CSEBlocks.insert(I->getParent());
+ }
}
return V;
}
@@ -5909,15 +6097,12 @@ Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) {
VL = UniqueValues;
}
- ShuffleInstructionBuilder ShuffleBuilder(Builder, VF);
+ ShuffleInstructionBuilder ShuffleBuilder(Builder, VF, GatherShuffleSeq,
+ CSEBlocks);
Value *Vec = gather(VL);
if (!ReuseShuffleIndicies.empty()) {
ShuffleBuilder.addMask(ReuseShuffleIndicies);
Vec = ShuffleBuilder.finalize(Vec);
- if (auto *I = dyn_cast<Instruction>(Vec)) {
- GatherSeq.insert(I);
- CSEBlocks.insert(I->getParent());
- }
}
return Vec;
}
@@ -5932,7 +6117,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty();
unsigned VF = E->getVectorFactor();
- ShuffleInstructionBuilder ShuffleBuilder(Builder, VF);
+ ShuffleInstructionBuilder ShuffleBuilder(Builder, VF, GatherShuffleSeq,
+ CSEBlocks);
if (E->State == TreeEntry::NeedToGather) {
if (E->getMainOp())
setInsertPointAfterBundle(E);
@@ -5946,16 +6132,16 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
"Expected shuffle of 1 or 2 entries.");
Vec = Builder.CreateShuffleVector(Entries.front()->VectorizedValue,
Entries.back()->VectorizedValue, Mask);
+ if (auto *I = dyn_cast<Instruction>(Vec)) {
+ GatherShuffleSeq.insert(I);
+ CSEBlocks.insert(I->getParent());
+ }
} else {
Vec = gather(E->Scalars);
}
if (NeedToShuffleReuses) {
ShuffleBuilder.addMask(E->ReuseShuffleIndices);
Vec = ShuffleBuilder.finalize(Vec);
- if (auto *I = dyn_cast<Instruction>(Vec)) {
- GatherSeq.insert(I);
- CSEBlocks.insert(I->getParent());
- }
}
E->VectorizedValue = Vec;
return Vec;
@@ -6072,11 +6258,16 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
IsIdentity &= *InsertIdx - Offset == I;
Mask[*InsertIdx - Offset] = I;
}
- if (!IsIdentity || NumElts != NumScalars)
+ if (!IsIdentity || NumElts != NumScalars) {
V = Builder.CreateShuffleVector(V, Mask);
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ GatherShuffleSeq.insert(I);
+ CSEBlocks.insert(I->getParent());
+ }
+ }
if ((!IsIdentity || Offset != 0 ||
- !isa<UndefValue>(FirstInsert->getOperand(0))) &&
+ !isUndefVector(FirstInsert->getOperand(0))) &&
NumElts != NumScalars) {
SmallVector<int> InsertMask(NumElts);
std::iota(InsertMask.begin(), InsertMask.end(), 0);
@@ -6088,6 +6279,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
V = Builder.CreateShuffleVector(
FirstInsert->getOperand(0), V, InsertMask,
cast<Instruction>(E->Scalars.back())->getName());
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ GatherShuffleSeq.insert(I);
+ CSEBlocks.insert(I->getParent());
+ }
}
++NumVectorInstructions;
@@ -6444,6 +6639,14 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
V1 = Builder.CreateCast(
static_cast<Instruction::CastOps>(E->getAltOpcode()), LHS, VecTy);
}
+ // Add V0 and V1 to later analysis to try to find and remove matching
+ // instruction, if any.
+ for (Value *V : {V0, V1}) {
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ GatherShuffleSeq.insert(I);
+ CSEBlocks.insert(I->getParent());
+ }
+ }
// Create shuffle to take alternate operations from the vector.
// Also, gather up main and alt scalar ops to propagate IR flags to
@@ -6462,8 +6665,11 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
propagateIRFlags(V1, AltScalars);
Value *V = Builder.CreateShuffleVector(V0, V1, Mask);
- if (Instruction *I = dyn_cast<Instruction>(V))
+ if (auto *I = dyn_cast<Instruction>(V)) {
V = propagateMetadata(I, E->Scalars);
+ GatherShuffleSeq.insert(I);
+ CSEBlocks.insert(I->getParent());
+ }
V = ShuffleBuilder.finalize(V);
E->VectorizedValue = V;
@@ -6657,10 +6863,10 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
}
void BoUpSLP::optimizeGatherSequence() {
- LLVM_DEBUG(dbgs() << "SLP: Optimizing " << GatherSeq.size()
+ LLVM_DEBUG(dbgs() << "SLP: Optimizing " << GatherShuffleSeq.size()
<< " gather sequences instructions.\n");
// LICM InsertElementInst sequences.
- for (Instruction *I : GatherSeq) {
+ for (Instruction *I : GatherShuffleSeq) {
if (isDeleted(I))
continue;
@@ -6677,11 +6883,10 @@ void BoUpSLP::optimizeGatherSequence() {
// If the vector or the element that we insert into it are
// instructions that are defined in this basic block then we can't
// hoist this instruction.
- auto *Op0 = dyn_cast<Instruction>(I->getOperand(0));
- auto *Op1 = dyn_cast<Instruction>(I->getOperand(1));
- if (Op0 && L->contains(Op0))
- continue;
- if (Op1 && L->contains(Op1))
+ if (any_of(I->operands(), [L](Value *V) {
+ auto *OpI = dyn_cast<Instruction>(V);
+ return OpI && L->contains(OpI);
+ }))
continue;
// We can hoist this instruction. Move it to the pre-header.
@@ -6705,7 +6910,50 @@ void BoUpSLP::optimizeGatherSequence() {
return A->getDFSNumIn() < B->getDFSNumIn();
});
- // Perform O(N^2) search over the gather sequences and merge identical
+ // Less defined shuffles can be replaced by the more defined copies.
+ // Between two shuffles one is less defined if it has the same vector operands
+ // and its mask indeces are the same as in the first one or undefs. E.g.
+ // shuffle %0, poison, <0, 0, 0, undef> is less defined than shuffle %0,
+ // poison, <0, 0, 0, 0>.
+ auto &&IsIdenticalOrLessDefined = [this](Instruction *I1, Instruction *I2,
+ SmallVectorImpl<int> &NewMask) {
+ if (I1->getType() != I2->getType())
+ return false;
+ auto *SI1 = dyn_cast<ShuffleVectorInst>(I1);
+ auto *SI2 = dyn_cast<ShuffleVectorInst>(I2);
+ if (!SI1 || !SI2)
+ return I1->isIdenticalTo(I2);
+ if (SI1->isIdenticalTo(SI2))
+ return true;
+ for (int I = 0, E = SI1->getNumOperands(); I < E; ++I)
+ if (SI1->getOperand(I) != SI2->getOperand(I))
+ return false;
+ // Check if the second instruction is more defined than the first one.
+ NewMask.assign(SI2->getShuffleMask().begin(), SI2->getShuffleMask().end());
+ ArrayRef<int> SM1 = SI1->getShuffleMask();
+ // Count trailing undefs in the mask to check the final number of used
+ // registers.
+ unsigned LastUndefsCnt = 0;
+ for (int I = 0, E = NewMask.size(); I < E; ++I) {
+ if (SM1[I] == UndefMaskElem)
+ ++LastUndefsCnt;
+ else
+ LastUndefsCnt = 0;
+ if (NewMask[I] != UndefMaskElem && SM1[I] != UndefMaskElem &&
+ NewMask[I] != SM1[I])
+ return false;
+ if (NewMask[I] == UndefMaskElem)
+ NewMask[I] = SM1[I];
+ }
+ // Check if the last undefs actually change the final number of used vector
+ // registers.
+ return SM1.size() - LastUndefsCnt > 1 &&
+ TTI->getNumberOfParts(SI1->getType()) ==
+ TTI->getNumberOfParts(
+ FixedVectorType::get(SI1->getType()->getElementType(),
+ SM1.size() - LastUndefsCnt));
+ };
+ // Perform O(N^2) search over the gather/shuffle sequences and merge identical
// instructions. TODO: We can further optimize this scan if we split the
// instructions into different buckets based on the insert lane.
SmallVector<Instruction *, 16> Visited;
@@ -6719,17 +6967,35 @@ void BoUpSLP::optimizeGatherSequence() {
if (isDeleted(&In))
continue;
if (!isa<InsertElementInst>(&In) && !isa<ExtractElementInst>(&In) &&
- !isa<ShuffleVectorInst>(&In))
+ !isa<ShuffleVectorInst>(&In) && !GatherShuffleSeq.contains(&In))
continue;
// Check if we can replace this instruction with any of the
// visited instructions.
bool Replaced = false;
- for (Instruction *v : Visited) {
- if (In.isIdenticalTo(v) &&
- DT->dominates(v->getParent(), In.getParent())) {
- In.replaceAllUsesWith(v);
+ for (Instruction *&V : Visited) {
+ SmallVector<int> NewMask;
+ if (IsIdenticalOrLessDefined(&In, V, NewMask) &&
+ DT->dominates(V->getParent(), In.getParent())) {
+ In.replaceAllUsesWith(V);
eraseInstruction(&In);
+ if (auto *SI = dyn_cast<ShuffleVectorInst>(V))
+ if (!NewMask.empty())
+ SI->setShuffleMask(NewMask);
+ Replaced = true;
+ break;
+ }
+ if (isa<ShuffleVectorInst>(In) && isa<ShuffleVectorInst>(V) &&
+ GatherShuffleSeq.contains(V) &&
+ IsIdenticalOrLessDefined(V, &In, NewMask) &&
+ DT->dominates(In.getParent(), V->getParent())) {
+ In.moveAfter(V);
+ V->replaceAllUsesWith(&In);
+ eraseInstruction(V);
+ if (auto *SI = dyn_cast<ShuffleVectorInst>(&In))
+ if (!NewMask.empty())
+ SI->setShuffleMask(NewMask);
+ V = &In;
Replaced = true;
break;
}
@@ -6741,7 +7007,7 @@ void BoUpSLP::optimizeGatherSequence() {
}
}
CSEBlocks.clear();
- GatherSeq.clear();
+ GatherShuffleSeq.clear();
}
// Groups the instructions to a bundle (which is then a single scheduling entity)
@@ -8791,6 +9057,8 @@ private:
assert(VectorizedValue && "Need to have a vectorized tree node");
assert(isPowerOf2_32(ReduxWidth) &&
"We only handle power-of-two reductions for now");
+ assert(RdxKind != RecurKind::FMulAdd &&
+ "A call to the llvm.fmuladd intrinsic is not handled yet");
++NumVectorInstructions;
return createSimpleTargetReduction(Builder, TTI, VectorizedValue, RdxKind,
@@ -9123,8 +9391,9 @@ bool SLPVectorizerPass::vectorizeInsertElementInst(InsertElementInst *IEI,
SmallVector<Value *, 16> BuildVectorOpds;
SmallVector<int> Mask;
if (!findBuildAggregate(IEI, TTI, BuildVectorOpds, BuildVectorInsts) ||
- (llvm::all_of(BuildVectorOpds,
- [](Value *V) { return isa<ExtractElementInst>(V); }) &&
+ (llvm::all_of(
+ BuildVectorOpds,
+ [](Value *V) { return isa<ExtractElementInst, UndefValue>(V); }) &&
isFixedVectorShuffle(BuildVectorOpds, Mask)))
return false;
@@ -9132,44 +9401,6 @@ bool SLPVectorizerPass::vectorizeInsertElementInst(InsertElementInst *IEI,
return tryToVectorizeList(BuildVectorInsts, R);
}
-bool SLPVectorizerPass::vectorizeSimpleInstructions(
- SmallVectorImpl<Instruction *> &Instructions, BasicBlock *BB, BoUpSLP &R,
- bool AtTerminator) {
- bool OpsChanged = false;
- SmallVector<Instruction *, 4> PostponedCmps;
- for (auto *I : reverse(Instructions)) {
- if (R.isDeleted(I))
- continue;
- if (auto *LastInsertValue = dyn_cast<InsertValueInst>(I))
- OpsChanged |= vectorizeInsertValueInst(LastInsertValue, BB, R);
- else if (auto *LastInsertElem = dyn_cast<InsertElementInst>(I))
- OpsChanged |= vectorizeInsertElementInst(LastInsertElem, BB, R);
- else if (isa<CmpInst>(I))
- PostponedCmps.push_back(I);
- }
- if (AtTerminator) {
- // Try to find reductions first.
- for (Instruction *I : PostponedCmps) {
- if (R.isDeleted(I))
- continue;
- for (Value *Op : I->operands())
- OpsChanged |= vectorizeRootInstruction(nullptr, Op, BB, R, TTI);
- }
- // Try to vectorize operands as vector bundles.
- for (Instruction *I : PostponedCmps) {
- if (R.isDeleted(I))
- continue;
- OpsChanged |= tryToVectorize(I, R);
- }
- Instructions.clear();
- } else {
- // Insert in reverse order since the PostponedCmps vector was filled in
- // reverse order.
- Instructions.assign(PostponedCmps.rbegin(), PostponedCmps.rend());
- }
- return OpsChanged;
-}
-
template <typename T>
static bool
tryToVectorizeSequence(SmallVectorImpl<T *> &Incoming,
@@ -9242,6 +9473,101 @@ tryToVectorizeSequence(SmallVectorImpl<T *> &Incoming,
return Changed;
}
+bool SLPVectorizerPass::vectorizeSimpleInstructions(
+ SmallVectorImpl<Instruction *> &Instructions, BasicBlock *BB, BoUpSLP &R,
+ bool AtTerminator) {
+ bool OpsChanged = false;
+ SmallVector<Instruction *, 4> PostponedCmps;
+ for (auto *I : reverse(Instructions)) {
+ if (R.isDeleted(I))
+ continue;
+ if (auto *LastInsertValue = dyn_cast<InsertValueInst>(I))
+ OpsChanged |= vectorizeInsertValueInst(LastInsertValue, BB, R);
+ else if (auto *LastInsertElem = dyn_cast<InsertElementInst>(I))
+ OpsChanged |= vectorizeInsertElementInst(LastInsertElem, BB, R);
+ else if (isa<CmpInst>(I))
+ PostponedCmps.push_back(I);
+ }
+ if (AtTerminator) {
+ // Try to find reductions first.
+ for (Instruction *I : PostponedCmps) {
+ if (R.isDeleted(I))
+ continue;
+ for (Value *Op : I->operands())
+ OpsChanged |= vectorizeRootInstruction(nullptr, Op, BB, R, TTI);
+ }
+ // Try to vectorize operands as vector bundles.
+ for (Instruction *I : PostponedCmps) {
+ if (R.isDeleted(I))
+ continue;
+ OpsChanged |= tryToVectorize(I, R);
+ }
+ // Try to vectorize list of compares.
+ // Sort by type, compare predicate, etc.
+ // TODO: Add analysis on the operand opcodes (profitable to vectorize
+ // instructions with same/alternate opcodes/const values).
+ auto &&CompareSorter = [&R](Value *V, Value *V2) {
+ auto *CI1 = cast<CmpInst>(V);
+ auto *CI2 = cast<CmpInst>(V2);
+ if (R.isDeleted(CI2) || !isValidElementType(CI2->getType()))
+ return false;
+ if (CI1->getOperand(0)->getType()->getTypeID() <
+ CI2->getOperand(0)->getType()->getTypeID())
+ return true;
+ if (CI1->getOperand(0)->getType()->getTypeID() >
+ CI2->getOperand(0)->getType()->getTypeID())
+ return false;
+ return CI1->getPredicate() < CI2->getPredicate() ||
+ (CI1->getPredicate() > CI2->getPredicate() &&
+ CI1->getPredicate() <
+ CmpInst::getSwappedPredicate(CI2->getPredicate()));
+ };
+
+ auto &&AreCompatibleCompares = [&R](Value *V1, Value *V2) {
+ if (V1 == V2)
+ return true;
+ auto *CI1 = cast<CmpInst>(V1);
+ auto *CI2 = cast<CmpInst>(V2);
+ if (R.isDeleted(CI2) || !isValidElementType(CI2->getType()))
+ return false;
+ if (CI1->getOperand(0)->getType() != CI2->getOperand(0)->getType())
+ return false;
+ return CI1->getPredicate() == CI2->getPredicate() ||
+ CI1->getPredicate() ==
+ CmpInst::getSwappedPredicate(CI2->getPredicate());
+ };
+ auto Limit = [&R](Value *V) {
+ unsigned EltSize = R.getVectorElementSize(V);
+ return std::max(2U, R.getMaxVecRegSize() / EltSize);
+ };
+
+ SmallVector<Value *> Vals(PostponedCmps.begin(), PostponedCmps.end());
+ OpsChanged |= tryToVectorizeSequence<Value>(
+ Vals, Limit, CompareSorter, AreCompatibleCompares,
+ [this, &R](ArrayRef<Value *> Candidates, bool LimitForRegisterSize) {
+ // Exclude possible reductions from other blocks.
+ bool ArePossiblyReducedInOtherBlock =
+ any_of(Candidates, [](Value *V) {
+ return any_of(V->users(), [V](User *U) {
+ return isa<SelectInst>(U) &&
+ cast<SelectInst>(U)->getParent() !=
+ cast<Instruction>(V)->getParent();
+ });
+ });
+ if (ArePossiblyReducedInOtherBlock)
+ return false;
+ return tryToVectorizeList(Candidates, R, LimitForRegisterSize);
+ },
+ /*LimitForRegisterSize=*/true);
+ Instructions.clear();
+ } else {
+ // Insert in reverse order since the PostponedCmps vector was filled in
+ // reverse order.
+ Instructions.assign(PostponedCmps.rbegin(), PostponedCmps.rend());
+ }
+ return OpsChanged;
+}
+
bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
bool Changed = false;
SmallVector<Value *, 4> Incoming;
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 638467f94e1c..44b5e1df0839 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -718,6 +718,8 @@ void VPInstruction::generateInstruction(VPTransformState &State,
void VPInstruction::execute(VPTransformState &State) {
assert(!State.Instance && "VPInstruction executing an Instance");
+ IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);
+ State.Builder.setFastMathFlags(FMF);
for (unsigned Part = 0; Part < State.UF; ++Part)
generateInstruction(State, Part);
}
@@ -760,6 +762,8 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
O << Instruction::getOpcodeName(getOpcode());
}
+ O << FMF;
+
for (const VPValue *Operand : operands()) {
O << " ";
Operand->printAsOperand(O, SlotTracker);
@@ -767,6 +771,16 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
}
#endif
+void VPInstruction::setFastMathFlags(FastMathFlags FMFNew) {
+ // Make sure the VPInstruction is a floating-point operation.
+ assert((Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
+ Opcode == Instruction::FNeg || Opcode == Instruction::FSub ||
+ Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
+ Opcode == Instruction::FCmp) &&
+ "this op can't take fast-math flags");
+ FMF = FMFNew;
+}
+
/// Generate the code inside the body of the vectorized loop. Assumes a single
/// LoopVectorBody basic-block was created for this. Introduce additional
/// basic-blocks as needed, and fill them all.
@@ -1196,8 +1210,10 @@ void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent,
printAsOperand(O, SlotTracker);
O << " = ";
getChainOp()->printAsOperand(O, SlotTracker);
- O << " + reduce." << Instruction::getOpcodeName(RdxDesc->getOpcode())
- << " (";
+ O << " +";
+ if (isa<FPMathOperator>(getUnderlyingInstr()))
+ O << getUnderlyingInstr()->getFastMathFlags();
+ O << " reduce." << Instruction::getOpcodeName(RdxDesc->getOpcode()) << " (";
getVecOp()->printAsOperand(O, SlotTracker);
if (getCondOp()) {
O << ", ";
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h
index 00ee31007cb7..810dd5030f95 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -59,6 +59,7 @@ class Value;
class VPBasicBlock;
class VPRegionBlock;
class VPlan;
+class VPReplicateRecipe;
class VPlanSlp;
/// Returns a calculation for the total number of elements for a given \p VF.
@@ -346,6 +347,10 @@ struct VPTransformState {
/// Pointer to the VPlan code is generated for.
VPlan *Plan;
+
+ /// Holds recipes that may generate a poison value that is used after
+ /// vectorization, even when their operands are not poison.
+ SmallPtrSet<VPRecipeBase *, 16> MayGeneratePoisonRecipes;
};
/// VPUsers instance used by VPBlockBase to manage CondBit and the block
@@ -789,6 +794,7 @@ public:
private:
typedef unsigned char OpcodeTy;
OpcodeTy Opcode;
+ FastMathFlags FMF;
/// Utility method serving execute(): generates a single instance of the
/// modeled instruction.
@@ -802,13 +808,6 @@ public:
: VPRecipeBase(VPRecipeBase::VPInstructionSC, Operands),
VPValue(VPValue::VPVInstructionSC, nullptr, this), Opcode(Opcode) {}
- VPInstruction(unsigned Opcode, ArrayRef<VPInstruction *> Operands)
- : VPRecipeBase(VPRecipeBase::VPInstructionSC, {}),
- VPValue(VPValue::VPVInstructionSC, nullptr, this), Opcode(Opcode) {
- for (auto *I : Operands)
- addOperand(I->getVPSingleValue());
- }
-
VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands)
: VPInstruction(Opcode, ArrayRef<VPValue *>(Operands)) {}
@@ -870,6 +869,9 @@ public:
return true;
}
}
+
+ /// Set the fast-math flags.
+ void setFastMathFlags(FastMathFlags FMFNew);
};
/// VPWidenRecipe is a recipe for producing a copy of vector type its
@@ -1511,7 +1513,7 @@ public:
/// - For store: Address, stored value, optional mask
/// TODO: We currently execute only per-part unless a specific instance is
/// provided.
-class VPWidenMemoryInstructionRecipe : public VPRecipeBase {
+class VPWidenMemoryInstructionRecipe : public VPRecipeBase, public VPValue {
Instruction &Ingredient;
// Whether the loaded-from / stored-to addresses are consecutive.
@@ -1533,10 +1535,10 @@ class VPWidenMemoryInstructionRecipe : public VPRecipeBase {
public:
VPWidenMemoryInstructionRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask,
bool Consecutive, bool Reverse)
- : VPRecipeBase(VPWidenMemoryInstructionSC, {Addr}), Ingredient(Load),
+ : VPRecipeBase(VPWidenMemoryInstructionSC, {Addr}),
+ VPValue(VPValue::VPVMemoryInstructionSC, &Load, this), Ingredient(Load),
Consecutive(Consecutive), Reverse(Reverse) {
assert((Consecutive || !Reverse) && "Reverse implies consecutive");
- new VPValue(VPValue::VPVMemoryInstructionSC, &Load, this);
setMask(Mask);
}
@@ -1544,6 +1546,7 @@ public:
VPValue *StoredValue, VPValue *Mask,
bool Consecutive, bool Reverse)
: VPRecipeBase(VPWidenMemoryInstructionSC, {Addr, StoredValue}),
+ VPValue(VPValue::VPVMemoryInstructionSC, &Store, this),
Ingredient(Store), Consecutive(Consecutive), Reverse(Reverse) {
assert((Consecutive || !Reverse) && "Reverse implies consecutive");
setMask(Mask);